From 2b5cc5f27d62a477586efc4f00c7a5027fe3f717 Mon Sep 17 00:00:00 2001 From: Andrew Linxie Date: Tue, 31 Jul 2018 18:12:45 -0700 Subject: [PATCH 01/17] Added advisory message for stack status file --- databricks_cli/stack/api.py | 3 +++ databricks_cli/stack/cli.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/databricks_cli/stack/api.py b/databricks_cli/stack/api.py index 01fb5836..c71077cf 100644 --- a/databricks_cli/stack/api.py +++ b/databricks_cli/stack/api.py @@ -90,6 +90,9 @@ def deploy(self, config_path, **kwargs): os.chdir(cli_dir) click.echo("Saving stack status to {}".format(status_path)) self._save_json(status_path, new_stack_status) + click.echo('Warning- The stack status file is an automatically generated file that is' + ' depended on for the databricks stack CLI to function correctly.' + ' Please do not edit the file.') def download(self, config_path, **kwargs): """ diff --git a/databricks_cli/stack/cli.py b/databricks_cli/stack/cli.py index ad68fb11..8642badc 100644 --- a/databricks_cli/stack/cli.py +++ b/databricks_cli/stack/cli.py @@ -61,7 +61,7 @@ def deploy(api_client, config_path, **kwargs): @provide_api_client def download(api_client, config_path, **kwargs): """ - Download a stack to the databricks workspace given a JSON stack configuration template. + Download a stack to the local filesystem given a JSON stack configuration template. """ click.echo('#' * 80) click.echo('Downloading stack at: {} with options: {}'.format(config_path, kwargs)) From 7ecf7fa1afe1d870276f0d2f108376238eb67799 Mon Sep 17 00:00:00 2001 From: Andrew Linxie Date: Wed, 1 Aug 2018 21:15:14 -0700 Subject: [PATCH 02/17] Added spaces in comments --- databricks_cli/stack/api.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/databricks_cli/stack/api.py b/databricks_cli/stack/api.py index c71077cf..0934e4ea 100644 --- a/databricks_cli/stack/api.py +++ b/databricks_cli/stack/api.py @@ -352,7 +352,7 @@ def _deploy_workspace(self, resource_properties, physical_id, overwrite): actual_object_type = DIRECTORY if os.path.isdir(local_path) else NOTEBOOK if object_type != actual_object_type: - raise StackError("Field 'object_type' ({}) not consistent" + raise StackError("Field 'object_type' ({}) not consistent " "with actual object type ({})".format(object_type, actual_object_type)) click.echo('Uploading {} from {} to Databricks workspace at {}'.format(object_type, @@ -362,7 +362,7 @@ def _deploy_workspace(self, resource_properties, physical_id, overwrite): # Inference of notebook language and format language_fmt = WorkspaceLanguage.to_language_and_format(local_path) if language_fmt is None: - raise StackError("Workspace notebook language and format cannot be inferred" + raise StackError("Workspace notebook language and format cannot be inferred. " "Please check file extension of notebook file.") language, fmt = language_fmt # Create needed directories in workspace. @@ -404,7 +404,7 @@ def _download_workspace(self, resource_properties, overwrite): # Inference of notebook language and format. A tuple of (language, fmt) or Nonetype. language_fmt = WorkspaceLanguage.to_language_and_format(local_path) if language_fmt is None: - raise StackError("Workspace Notebook language and format cannot be inferred." + raise StackError("Workspace Notebook language and format cannot be inferred. " "Please check file extension of notebook 'source_path'.") (_, fmt) = language_fmt local_dir = os.path.dirname(os.path.abspath(local_path)) From 730938bee0c8a6a61be9497ac5a36a22f373a6a1 Mon Sep 17 00:00:00 2001 From: Andrew Linxie Date: Thu, 2 Aug 2018 17:19:44 -0700 Subject: [PATCH 03/17] Change interface --- databricks_cli/stack/api.py | 178 ++++++++++++++++++------------------ databricks_cli/stack/cli.py | 66 ++++++++++++- tests/stack/test_api.py | 91 +----------------- tests/stack/test_cli.py | 122 +++++++++++++++++++++--- 4 files changed, 269 insertions(+), 188 deletions(-) diff --git a/databricks_cli/stack/api.py b/databricks_cli/stack/api.py index cb49bf13..254c5890 100644 --- a/databricks_cli/stack/api.py +++ b/databricks_cli/stack/api.py @@ -80,54 +80,54 @@ def __init__(self, api_client): self.workspace_client = WorkspaceApi(api_client) self.dbfs_client = DbfsApi(api_client) - def deploy(self, config_path, **kwargs): - """ - Deploys a stack given stack JSON configuration template at path config_path. - - Loads the JSON template as well as status JSON if stack has been deployed before. - - The working directory is changed to that where the JSON template is contained - so that paths within the stack configuration are relative to the directory of the - JSON template instead of the directory where this function is called. - - :param config_path: Path to stack JSON configuration template. Must have the fields of - 'name', the name of the stack and 'resources', a list of stack resources. - :return: None. - """ - stack_config = self._load_json(config_path) - status_path = self._generate_stack_status_path(config_path) - stack_status = self._load_json(status_path) - config_dir = os.path.dirname(os.path.abspath(config_path)) - cli_dir = os.getcwd() - os.chdir(config_dir) # Switch current working directory to where json config is stored - new_stack_status = self.deploy_config(stack_config, stack_status, **kwargs) - os.chdir(cli_dir) - click.echo("Saving stack status to {}".format(status_path)) - self._save_json(status_path, new_stack_status) - click.echo('Warning- The stack status file is an automatically generated file that is' - ' depended on for the databricks stack CLI to function correctly.' - ' Please do not edit the file.') - - def download(self, config_path, **kwargs): - """ - Downloads a stack given stack JSON configuration template at path config_path. - - The working directory is changed to that where the JSON template is contained - so that paths within the stack configuration are relative to the directory of the - JSON template instead of the directory where this function is called. - - :param config_path: Path to stack JSON configuration template. Must have the fields of - 'name', the name of the stack and 'resources', a list of stack resources. - :return: None. - """ - stack_config = self._load_json(config_path) - config_dir = os.path.dirname(os.path.abspath(config_path)) - cli_dir = os.getcwd() - os.chdir(config_dir) # Switch current working directory to where json config is stored - self.download_from_config(stack_config, **kwargs) - os.chdir(cli_dir) - - def deploy_config(self, stack_config, stack_status=None, **kwargs): + # def deploy(self, config_path, **kwargs): + # """ + # Deploys a stack given stack JSON configuration template at path config_path. + # + # Loads the JSON template as well as status JSON if stack has been deployed before. + # + # The working directory is changed to that where the JSON template is contained + # so that paths within the stack configuration are relative to the directory of the + # JSON template instead of the directory where this function is called. + # + # :param config_path: Path to stack JSON configuration template. Must have the fields of + # 'name', the name of the stack and 'resources', a list of stack resources. + # :return: None. + # """ + # stack_config = self._load_json(config_path) + # status_path = self._generate_stack_status_path(config_path) + # stack_status = self._load_json(status_path) + # config_dir = os.path.dirname(os.path.abspath(config_path)) + # cli_dir = os.getcwd() + # os.chdir(config_dir) # Switch current working directory to where json config is stored + # new_stack_status = self.deploy_config(stack_config, stack_status, **kwargs) + # os.chdir(cli_dir) + # click.echo("Saving stack status to {}".format(status_path)) + # self._save_json(status_path, new_stack_status) + # click.echo('Warning- The stack status file is an automatically generated file that is' + # ' depended on for the databricks stack CLI to function correctly.' + # ' Please do not edit the file.') + # + # def download(self, config_path, **kwargs): + # """ + # Downloads a stack given stack JSON configuration template at path config_path. + # + # The working directory is changed to that where the JSON template is contained + # so that paths within the stack configuration are relative to the directory of the + # JSON template instead of the directory where this function is called. + # + # :param config_path: Path to stack JSON configuration template. Must have the fields of + # 'name', the name of the stack and 'resources', a list of stack resources. + # :return: None. + # """ + # stack_config = self._load_json(config_path) + # config_dir = os.path.dirname(os.path.abspath(config_path)) + # cli_dir = os.getcwd() + # os.chdir(config_dir) # Switch current working directory to where json config is stored + # self.download_from_config(stack_config, **kwargs) + # os.chdir(cli_dir) + + def deploy(self, stack_config, stack_status=None, **kwargs): """ Deploys a stack given stack JSON configuration template at path config_path. @@ -175,7 +175,7 @@ def deploy_config(self, stack_config, stack_status=None, **kwargs): return new_stack_status - def download_from_config(self, stack_config, **kwargs): + def download(self, stack_config, **kwargs): """ Downloads a stack given a dict of the stack configuration. :param stack_config: dict of stack configuration. Must contain 'name' and 'resources' field. @@ -599,43 +599,43 @@ def _get_resource_to_status_map(self, stack_status): for resource_status in stack_status.get(STACK_DEPLOYED) } - def _generate_stack_status_path(self, stack_path): - """ - Given a path to the stack configuration template JSON file, generates a path to where the - deployment status JSON will be stored after successful deployment of the stack. - - :param stack_path: Path to the stack config template JSON file - :return: The path to the stack status file. - - >>> self._generate_stack_status_path('./stack.json') - './stack.deployed.json' - """ - stack_status_insert = 'deployed' - stack_path_split = stack_path.split('.') - stack_path_split.insert(-1, stack_status_insert) - return '.'.join(stack_path_split) - - def _load_json(self, path): - """ - Parse a json file to a readable dict format. - Returns an empty dictionary if the path doesn't exist. - - :param path: File path of the JSON stack configuration template. - :return: dict of parsed JSON stack config template. - """ - stack_conf = {} - if os.path.exists(path): - with open(path, 'r') as f: - stack_conf = json.load(f) - return stack_conf - - def _save_json(self, path, data): - """ - Writes data to a JSON file. - - :param path: Path of JSON file. - :param data: dict- data that wants to by written to JSON file - :return: None - """ - with open(path, 'w') as f: - json.dump(data, f, indent=2, sort_keys=True) + # def _generate_stack_status_path(self, stack_path): + # """ + # Given a path to the stack configuration template JSON file, generates a path to where the + # deployment status JSON will be stored after successful deployment of the stack. + # + # :param stack_path: Path to the stack config template JSON file + # :return: The path to the stack status file. + # + # >>> self._generate_stack_status_path('./stack.json') + # './stack.deployed.json' + # """ + # stack_status_insert = 'deployed' + # stack_path_split = stack_path.split('.') + # stack_path_split.insert(-1, stack_status_insert) + # return '.'.join(stack_path_split) + # + # def _load_json(self, path): + # """ + # Parse a json file to a readable dict format. + # Returns an empty dictionary if the path doesn't exist. + # + # :param path: File path of the JSON stack configuration template. + # :return: dict of parsed JSON stack config template. + # """ + # stack_conf = {} + # if os.path.exists(path): + # with open(path, 'r') as f: + # stack_conf = json.load(f) + # return stack_conf + # + # def _save_json(self, path, data): + # """ + # Writes data to a JSON file. + # + # :param path: Path of JSON file. + # :param data: dict- data that wants to by written to JSON file + # :return: None + # """ + # with open(path, 'w') as f: + # json.dump(data, f, indent=2, sort_keys=True) diff --git a/databricks_cli/stack/cli.py b/databricks_cli/stack/cli.py index 8642badc..02920bfb 100644 --- a/databricks_cli/stack/cli.py +++ b/databricks_cli/stack/cli.py @@ -20,6 +20,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import os +import json import click @@ -31,6 +33,50 @@ DEBUG_MODE = True +def _generate_stack_status_path(stack_path): + """ + Given a path to the stack configuration template JSON file, generates a path to where the + deployment status JSON will be stored after successful deployment of the stack. + + :param stack_path: Path to the stack config template JSON file + :return: The path to the stack status file. + + >>> self._generate_stack_status_path('./stack.json') + './stack.deployed.json' + """ + stack_status_insert = 'deployed' + stack_path_split = stack_path.split('.') + stack_path_split.insert(-1, stack_status_insert) + return '.'.join(stack_path_split) + + +def _load_json(path): + """ + Parse a json file to a readable dict format. + Returns an empty dictionary if the path doesn't exist. + + :param path: File path of the JSON stack configuration template. + :return: dict of parsed JSON stack config template. + """ + stack_conf = {} + if os.path.exists(path): + with open(path, 'r') as f: + stack_conf = json.load(f) + return stack_conf + + +def _save_json(path, data): + """ + Writes data to a JSON file. + + :param path: Path of JSON file. + :param data: dict- data that wants to by written to JSON file + :return: None + """ + with open(path, 'w') as f: + json.dump(data, f, indent=2, sort_keys=True) + + @click.command(context_settings=CONTEXT_SETTINGS, short_help='Deploy stack given a JSON configuration of the stack') @click.argument('config_path', type=click.Path(exists=True), required=True) @@ -46,6 +92,19 @@ def deploy(api_client, config_path, **kwargs): """ click.echo('#' * 80) click.echo('Deploying stack at: {} with options: {}'.format(config_path, kwargs)) + stack_config = _load_json(config_path) + status_path = _generate_stack_status_path(config_path) + stack_status = _load_json(status_path) + config_dir = os.path.dirname(os.path.abspath(config_path)) + cli_dir = os.getcwd() + os.chdir(config_dir) # Switch current working directory to where json config is stored + new_stack_status = StackApi(api_client).deploy(stack_config, stack_status, **kwargs) + os.chdir(cli_dir) + click.echo("Saving stack status to {}".format(status_path)) + _save_json(status_path, new_stack_status) + click.echo('Warning- The stack status file is an automatically generated file that is' + ' depended on for the databricks stack CLI to function correctly.' + ' Please do not edit the file.') StackApi(api_client).deploy(config_path, **kwargs) click.echo('#' * 80) @@ -65,7 +124,12 @@ def download(api_client, config_path, **kwargs): """ click.echo('#' * 80) click.echo('Downloading stack at: {} with options: {}'.format(config_path, kwargs)) - StackApi(api_client).download(config_path, **kwargs) + stack_config = _load_json(config_path) + config_dir = os.path.dirname(os.path.abspath(config_path)) + cli_dir = os.getcwd() + os.chdir(config_dir) # Switch current working directory to where json config is stored + StackApi(api_client).download(stack_config, **kwargs) + os.chdir(cli_dir) click.echo('#' * 80) diff --git a/tests/stack/test_api.py b/tests/stack/test_api.py index e6ad9d83..7e596e84 100644 --- a/tests/stack/test_api.py +++ b/tests/stack/test_api.py @@ -26,7 +26,6 @@ # pylint:disable=unused-argument import os -import json import copy import mock from requests.exceptions import HTTPError @@ -37,7 +36,6 @@ import databricks_cli.workspace.api as workspace_api from databricks_cli.stack.exceptions import StackError -TEST_STACK_PATH = 'stack/stack.json' TEST_JOB_SETTINGS = { api.JOBS_RESOURCE_NAME: 'test job' } @@ -195,85 +193,6 @@ def stack_api(): class TestStackApi(object): - def test_load_json_config(self, stack_api, tmpdir): - """ - _load_json should read the same JSON content that was originally in the - stack configuration JSON. - """ - stack_path = os.path.join(tmpdir.strpath, TEST_STACK_PATH) - os.makedirs(os.path.dirname(stack_path)) - with open(stack_path, "w+") as f: - json.dump(TEST_STACK, f) - config = stack_api._load_json(stack_path) - assert config == TEST_STACK - - def test_generate_stack_status_path(self, stack_api, tmpdir): - """ - The _generate_stack_status_path should add the word 'deployed' between the json file - extension and the filename of the stack configuration file. - """ - config_path = os.path.join(tmpdir.strpath, 'test.json') - expected_status_path = os.path.join(tmpdir.strpath, 'test.deployed.json') - generated_path = stack_api._generate_stack_status_path(config_path) - assert expected_status_path == generated_path - - config_path = os.path.join(tmpdir.strpath, 'test.st-ack.json') - expected_status_path = os.path.join(tmpdir.strpath, 'test.st-ack.deployed.json') - generated_path = stack_api._generate_stack_status_path(config_path) - assert expected_status_path == generated_path - - def test_save_load_stack_status(self, stack_api, tmpdir): - """ - When saving the a stack status through _save_stack_status, it should be able to be - loaded by _load_stack_status and have the same exact contents. - """ - config_path = os.path.join(tmpdir.strpath, 'test.json') - status_path = stack_api._generate_stack_status_path(config_path) - stack_api._save_json(status_path, TEST_STATUS) - - status = stack_api._load_json(status_path) - assert status == TEST_STATUS - - def test_deploy_relative_paths(self, stack_api, tmpdir): - """ - When doing stack_api.deploy, in every call to stack_api._deploy_resource, the current - working directory should be the same directory as where the stack config template is - contained so that relative paths for resources can be relative to the stack config - instead of where CLI calls the API functions. - """ - config_working_dir = os.path.join(tmpdir.strpath, 'stack') - config_path = os.path.join(config_working_dir, 'test.json') - os.makedirs(config_working_dir) - with open(config_path, 'w+') as f: - json.dump(TEST_STACK, f) - - def _deploy_resource(resource, stack_status, **kwargs): - assert os.getcwd() == config_working_dir - return TEST_JOB_STATUS - - stack_api._deploy_resource = mock.Mock(wraps=_deploy_resource) - stack_api.deploy(config_path) - - def test_download_relative_paths(self, stack_api, tmpdir): - """ - When doing stack_api.download, in every call to stack_api._deploy_resource, the current - working directory should be the same directory as where the stack config template is - contained so that relative paths for resources can be relative to the stack config - instead of where CLI calls the API functions. - """ - config_working_dir = os.path.join(tmpdir.strpath, 'stack') - config_path = os.path.join(config_working_dir, 'test.json') - os.makedirs(config_working_dir) - with open(config_path, 'w+') as f: - json.dump(TEST_STACK, f) - - def _download_resource(resource, **kwargs): - assert os.getcwd() == config_working_dir - return TEST_JOB_STATUS - - stack_api._download_resource = mock.Mock(wraps=_download_resource) - stack_api.download(config_path) - def test_deploy_job(self, stack_api): """ stack_api._deploy_job should create a new job when 1) A physical_id is not given and @@ -454,7 +373,7 @@ def test_deploy_resource(self, stack_api): stack_api._deploy_resource should return relevant fields in output if deploy done correctly. """ - # TODO(alinxie) Change this test to directly call stack_api.deploy/stack_api.deploy_config + # TODO(alinxie) Change this test to directly call stack_api.deploy # A job resource should have _deploy_resource call on _deploy_job stack_api._deploy_job = mock.MagicMock() test_job_physical_id = {api.JOBS_RESOURCE_ID: 12345} @@ -627,14 +546,14 @@ def test_deploy_valid_stack_status(self, stack_api, tmpdir): # Run deploy command on stack and validate stack status. If there are no exceptions # related to validation errors for the newly generated stack status, then test # will pass. - new_stack_status_1 = stack_api.deploy_config(test_stack) + new_stack_status_1 = stack_api.deploy(test_stack) # Test some extra correctness criteria for the generated stack status. assert new_stack_status_1.get(api.STACK_RESOURCES) == test_stack.get(api.STACK_RESOURCES) for deployed_resource in new_stack_status_1.get(api.STACK_DEPLOYED): assert deployed_resource.get(api.RESOURCE_DEPLOY_OUTPUT) == test_deploy_output # Test that when passing in a status that the new status generated will still be valid. - new_stack_status_2 = stack_api.deploy_config(test_stack, stack_status=TEST_STATUS) + new_stack_status_2 = stack_api.deploy(test_stack, stack_status=TEST_STATUS) # Test some extra correctness criteria for the generated stack status. assert new_stack_status_2.get(api.STACK_RESOURCES) == test_stack.get(api.STACK_RESOURCES) for deployed_resource in new_stack_status_2.get(api.STACK_DEPLOYED): @@ -648,7 +567,7 @@ def test_download(self, stack_api): stack_api.workspace_client.export_workspace = mock.MagicMock() stack_api.workspace_client.export_workspace_dir = mock.MagicMock() - stack_api.download_from_config(TEST_STACK) - stack_api.download_from_config(TEST_STACK, overwrite=True) + stack_api.download(TEST_STACK) + stack_api.download(TEST_STACK, overwrite=True) assert stack_api.workspace_client.export_workspace.call_count == 2 assert stack_api.workspace_client.export_workspace_dir.call_count == 2 diff --git a/tests/stack/test_cli.py b/tests/stack/test_cli.py index 8a32c23d..a21b9fc7 100644 --- a/tests/stack/test_cli.py +++ b/tests/stack/test_cli.py @@ -22,13 +22,31 @@ # limitations under the License. # TODO(alinxie)Write test which deploys a stack and validates the status json. # pylint:disable=redefined-outer-name +# pylint:disable=unused-argument +import os +import json import pytest import mock from click.testing import CliRunner import databricks_cli.stack.cli as cli from tests.utils import provide_conf +from tests.stack.test_api import TEST_STATUS, TEST_STACK + +TEST_STACK_PATH = 'stack/stack.json' + + +def _write_stack(tmpdir): + """ + Utility function to store the stack config in the filesystem and returns the config path. + """ + config_working_dir = os.path.join(tmpdir.strpath, 'stack') + config_path = os.path.join(config_working_dir, 'test.json') + os.makedirs(config_working_dir) + with open(config_path, 'w+') as f: + json.dump(TEST_STACK, f) + return config_path @pytest.fixture() @@ -45,12 +63,12 @@ def test_deploy_kwargs(stack_api_mock, tmpdir): Calling the cli.deploy command should call the deploy function of the stack API and pass in possible kwargs into deploy function """ - path = tmpdir.strpath + config_path = _write_stack(tmpdir) stack_api_mock.deploy = mock.MagicMock() runner = CliRunner() - runner.invoke(cli.deploy, ['--overwrite', path]) + runner.invoke(cli.deploy, ['--overwrite', config_path]) stack_api_mock.deploy.assert_called() - assert stack_api_mock.deploy.call_args[0][0] == path + assert stack_api_mock.deploy.call_args[0][0] == TEST_STACK # Check overwrite in kwargs assert stack_api_mock.deploy.call_args[1]['overwrite'] is True @@ -61,12 +79,12 @@ def test_download_kwargs(stack_api_mock, tmpdir): Calling the cli.deploy command should call the deploy function of the stack API and pass in possible kwargs into deploy function """ - path = tmpdir.strpath + config_path = _write_stack(tmpdir) stack_api_mock.download = mock.MagicMock() runner = CliRunner() - runner.invoke(cli.download, ['--overwrite', path]) + runner.invoke(cli.download, ['--overwrite', config_path]) stack_api_mock.download.assert_called() - assert stack_api_mock.download.call_args[0][0] == path + assert stack_api_mock.download.call_args[0][0] == TEST_STACK # Check overwrite in kwargs assert stack_api_mock.download.call_args[1]['overwrite'] is True @@ -77,12 +95,12 @@ def test_deploy_default_kwargs(stack_api_mock, tmpdir): Calling the cli.deploy command without flags should call the deploy function of the stack API and pass in default kwargs into deploy function """ - path = tmpdir.strpath + config_path = _write_stack(tmpdir) stack_api_mock.deploy = mock.MagicMock() runner = CliRunner() - runner.invoke(cli.deploy, [path]) + runner.invoke(cli.deploy, [config_path]) stack_api_mock.deploy.assert_called() - assert stack_api_mock.deploy.call_args[0][0] == path + assert stack_api_mock.deploy.call_args[0][0] == TEST_STACK # Check overwrite in kwargs assert stack_api_mock.deploy.call_args[1]['overwrite'] is False @@ -93,11 +111,91 @@ def test_download_default_kwargs(stack_api_mock, tmpdir): Calling the cli.deploy command should call the deploy function of the stack API and pass in possible kwargs into deploy function """ - path = tmpdir.strpath + config_path = _write_stack(tmpdir) stack_api_mock.download = mock.MagicMock() runner = CliRunner() - runner.invoke(cli.download, [path]) + runner.invoke(cli.download, [config_path]) stack_api_mock.download.assert_called() - assert stack_api_mock.download.call_args[0][0] == path + assert stack_api_mock.download.call_args[0][0] == TEST_STACK # Check overwrite in kwargs assert stack_api_mock.download.call_args[1]['overwrite'] is False + + +@provide_conf +def test_deploy_relative_paths(stack_api_mock, tmpdir): + """ + When cli.deploy calls on StackApi.deploy, the current + working directory should be the same directory as where the stack config template is + contained so that relative paths for resources can be relative to the stack config + instead of where CLI calls the API functions. + """ + config_path = _write_stack(tmpdir) + config_working_dir = os.path.dirname(config_path) + + def _deploy(*args, **kwargs): + assert os.getcwd() == config_working_dir + + stack_api_mock.deploy = mock.Mock(wraps=_deploy) + runner = CliRunner() + runner.invoke(cli.deploy, [config_path]) + + +@provide_conf +def test_download_relative_paths(stack_api_mock, tmpdir): + """ + When cli.download calls on StackApi.download, the current working + working directory should be the same directory as where the stack config template is + contained so that relative paths for resources can be relative to the stack config + instead of where CLI calls the API functions. + """ + config_path = _write_stack(tmpdir) + config_working_dir = os.path.dirname(config_path) + + def _download(*args, **kwargs): + assert os.getcwd() == config_working_dir + + stack_api_mock.download = mock.Mock(wraps=_download) + runner = CliRunner() + runner.invoke(cli.download, [config_path]) + + +def test_load_json_config(tmpdir): + """ + _load_json should read the same JSON content that was originally in the + stack configuration JSON. + """ + stack_path = os.path.join(tmpdir.strpath, TEST_STACK_PATH) + os.makedirs(os.path.dirname(stack_path)) + with open(stack_path, "w+") as f: + json.dump(TEST_STACK, f) + config = cli._load_json(stack_path) + assert config == TEST_STACK + + +def test_generate_stack_status_path(tmpdir): + """ + The _generate_stack_status_path should add the word 'deployed' between the json file + extension and the filename of the stack configuration file. + """ + config_path = os.path.join(tmpdir.strpath, 'test.json') + expected_status_path = os.path.join(tmpdir.strpath, 'test.deployed.json') + generated_path = cli._generate_stack_status_path(config_path) + assert expected_status_path == generated_path + + config_path = os.path.join(tmpdir.strpath, 'test.st-ack.json') + expected_status_path = os.path.join(tmpdir.strpath, 'test.st-ack.deployed.json') + generated_path = cli._generate_stack_status_path(config_path) + assert expected_status_path == generated_path + + +def test_save_load_stack_status(tmpdir): + """ + When saving the a stack status through _save_json, it should be able to be + loaded by _load_json and have the same exact contents. + """ + config_path = os.path.join(tmpdir.strpath, 'test.json') + status_path = cli._generate_stack_status_path(config_path) + cli._save_json(status_path, TEST_STATUS) + + status = cli._load_json(status_path) + assert status == TEST_STATUS From 873715e4e8ce3070d073703c977dbd4680b955e3 Mon Sep 17 00:00:00 2001 From: Andrew Linxie Date: Thu, 2 Aug 2018 17:43:51 -0700 Subject: [PATCH 04/17] Removed comments --- databricks_cli/stack/api.py | 88 ------------------------------------- 1 file changed, 88 deletions(-) diff --git a/databricks_cli/stack/api.py b/databricks_cli/stack/api.py index 254c5890..f6e0eac8 100644 --- a/databricks_cli/stack/api.py +++ b/databricks_cli/stack/api.py @@ -80,53 +80,6 @@ def __init__(self, api_client): self.workspace_client = WorkspaceApi(api_client) self.dbfs_client = DbfsApi(api_client) - # def deploy(self, config_path, **kwargs): - # """ - # Deploys a stack given stack JSON configuration template at path config_path. - # - # Loads the JSON template as well as status JSON if stack has been deployed before. - # - # The working directory is changed to that where the JSON template is contained - # so that paths within the stack configuration are relative to the directory of the - # JSON template instead of the directory where this function is called. - # - # :param config_path: Path to stack JSON configuration template. Must have the fields of - # 'name', the name of the stack and 'resources', a list of stack resources. - # :return: None. - # """ - # stack_config = self._load_json(config_path) - # status_path = self._generate_stack_status_path(config_path) - # stack_status = self._load_json(status_path) - # config_dir = os.path.dirname(os.path.abspath(config_path)) - # cli_dir = os.getcwd() - # os.chdir(config_dir) # Switch current working directory to where json config is stored - # new_stack_status = self.deploy_config(stack_config, stack_status, **kwargs) - # os.chdir(cli_dir) - # click.echo("Saving stack status to {}".format(status_path)) - # self._save_json(status_path, new_stack_status) - # click.echo('Warning- The stack status file is an automatically generated file that is' - # ' depended on for the databricks stack CLI to function correctly.' - # ' Please do not edit the file.') - # - # def download(self, config_path, **kwargs): - # """ - # Downloads a stack given stack JSON configuration template at path config_path. - # - # The working directory is changed to that where the JSON template is contained - # so that paths within the stack configuration are relative to the directory of the - # JSON template instead of the directory where this function is called. - # - # :param config_path: Path to stack JSON configuration template. Must have the fields of - # 'name', the name of the stack and 'resources', a list of stack resources. - # :return: None. - # """ - # stack_config = self._load_json(config_path) - # config_dir = os.path.dirname(os.path.abspath(config_path)) - # cli_dir = os.getcwd() - # os.chdir(config_dir) # Switch current working directory to where json config is stored - # self.download_from_config(stack_config, **kwargs) - # os.chdir(cli_dir) - def deploy(self, stack_config, stack_status=None, **kwargs): """ Deploys a stack given stack JSON configuration template at path config_path. @@ -598,44 +551,3 @@ def _get_resource_to_status_map(self, stack_status): resource_status for resource_status in stack_status.get(STACK_DEPLOYED) } - - # def _generate_stack_status_path(self, stack_path): - # """ - # Given a path to the stack configuration template JSON file, generates a path to where the - # deployment status JSON will be stored after successful deployment of the stack. - # - # :param stack_path: Path to the stack config template JSON file - # :return: The path to the stack status file. - # - # >>> self._generate_stack_status_path('./stack.json') - # './stack.deployed.json' - # """ - # stack_status_insert = 'deployed' - # stack_path_split = stack_path.split('.') - # stack_path_split.insert(-1, stack_status_insert) - # return '.'.join(stack_path_split) - # - # def _load_json(self, path): - # """ - # Parse a json file to a readable dict format. - # Returns an empty dictionary if the path doesn't exist. - # - # :param path: File path of the JSON stack configuration template. - # :return: dict of parsed JSON stack config template. - # """ - # stack_conf = {} - # if os.path.exists(path): - # with open(path, 'r') as f: - # stack_conf = json.load(f) - # return stack_conf - # - # def _save_json(self, path, data): - # """ - # Writes data to a JSON file. - # - # :param path: Path of JSON file. - # :param data: dict- data that wants to by written to JSON file - # :return: None - # """ - # with open(path, 'w') as f: - # json.dump(data, f, indent=2, sort_keys=True) From de98093c0d11e8b49c6532b310d313bb0919e72b Mon Sep 17 00:00:00 2001 From: Andrew Linxie Date: Thu, 2 Aug 2018 17:52:32 -0700 Subject: [PATCH 05/17] Little comment change --- databricks_cli/stack/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/databricks_cli/stack/api.py b/databricks_cli/stack/api.py index f6e0eac8..a53468e6 100644 --- a/databricks_cli/stack/api.py +++ b/databricks_cli/stack/api.py @@ -316,7 +316,7 @@ def _deploy_workspace(self, resource_properties, physical_id, overwrite): actual_object_type = DIRECTORY if os.path.isdir(local_path) else NOTEBOOK if object_type != actual_object_type: - raise StackError("Field '{}' ({}) not consistent" + raise StackError("Field '{}' ({}) not consistent " "with actual object type ({})".format(WORKSPACE_RESOURCE_OBJECT_TYPE, object_type, actual_object_type)) From 8056e20df60db6f588f3ecafe4387e5f5e74610d Mon Sep 17 00:00:00 2001 From: Andrew Linxie Date: Thu, 2 Aug 2018 18:03:30 -0700 Subject: [PATCH 06/17] Flip ' comments to " comments to be more consistent with JSON --- databricks_cli/stack/api.py | 48 ++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/databricks_cli/stack/api.py b/databricks_cli/stack/api.py index a53468e6..a16a5474 100644 --- a/databricks_cli/stack/api.py +++ b/databricks_cli/stack/api.py @@ -167,13 +167,13 @@ def _deploy_resource(self, resource_config, resource_status=None, **kwargs): physical_id = resource_status.get(RESOURCE_PHYSICAL_ID) if resource_status else None if resource_service == JOBS_SERVICE: - click.echo("Deploying job '{}' with properties: \n{}".format(resource_id, json.dumps( + click.echo('Deploying job "{}" with properties: \n{}'.format(resource_id, json.dumps( resource_properties, indent=2, separators=(',', ': ')))) new_physical_id, deploy_output = self._deploy_job(resource_properties, physical_id) elif resource_service == WORKSPACE_SERVICE: click.echo( - "Deploying workspace asset '{}' with properties \n{}" + 'Deploying workspace asset "{}" with properties \n{}' .format( resource_id, json.dumps(resource_properties, indent=2, separators=(',', ': ')) ) @@ -184,7 +184,7 @@ def _deploy_resource(self, resource_config, resource_status=None, **kwargs): overwrite) elif resource_service == DBFS_SERVICE: click.echo( - "Deploying DBFS asset '{}' with properties \n{}".format( + 'Deploying DBFS asset "{}" with properties \n{}'.format( resource_id, json.dumps(resource_properties, indent=2, separators=(',', ': ')) ) ) @@ -193,7 +193,7 @@ def _deploy_resource(self, resource_config, resource_status=None, **kwargs): physical_id, overwrite) else: - raise StackError("Resource service '{}' not supported".format(resource_service)) + raise StackError('Resource service "{}" not supported'.format(resource_service)) new_resource_status = {RESOURCE_ID: resource_id, RESOURCE_SERVICE: resource_service, @@ -219,7 +219,7 @@ def _download_resource(self, resource_config, **kwargs): if resource_service == WORKSPACE_SERVICE: click.echo( - "Downloading workspace asset '{}' with properties \n{}" + 'Downloading workspace asset "{}" with properties \n{}' .format( resource_id, json.dumps(resource_properties, indent=2, separators=(',', ': ')) ) @@ -227,8 +227,8 @@ def _download_resource(self, resource_config, **kwargs): overwrite = kwargs.get('overwrite', False) self._download_workspace(resource_properties, overwrite) else: - click.echo("Resource service '{}' not supported for download. " - "skipping.".format(resource_service)) + click.echo('Resource service "{}" not supported for download. ' + 'skipping.'.format(resource_service)) def _deploy_job(self, resource_properties, physical_id=None): """ @@ -269,15 +269,15 @@ def _put_job(self, job_settings): job_name = job_settings.get(JOBS_RESOURCE_NAME) jobs_same_name = self.jobs_client._list_jobs_by_name(job_name) if len(jobs_same_name) > 1: - raise StackError("Multiple jobs with the same name '{}' already exist, aborting" - " stack deployment".format(job_name)) + raise StackError('Multiple jobs with the same name "{}" already exist, aborting' + ' stack deployment'.format(job_name)) elif len(jobs_same_name) == 1: existing_job = jobs_same_name[0] creator_name = existing_job.get('creator_user_name') timestamp = existing_job.get('created_time') / MS_SEC # Convert to readable date. date_created = datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d %H:%M:%S') - click.echo("Warning: Job exists with same name '{}' created by {} on {}. Job will " - "be overwritten".format(job_name, creator_name, date_created)) + click.echo('Warning: Job exists with same name "{}" created by {} on {}. Job will ' + 'be overwritten'.format(job_name, creator_name, date_created)) # Calling jobs_client.reset_job directly so as to not call same level function. self.jobs_client.reset_job({'job_id': existing_job.get('job_id'), 'new_settings': job_settings}) @@ -316,8 +316,8 @@ def _deploy_workspace(self, resource_properties, physical_id, overwrite): actual_object_type = DIRECTORY if os.path.isdir(local_path) else NOTEBOOK if object_type != actual_object_type: - raise StackError("Field '{}' ({}) not consistent " - "with actual object type ({})".format(WORKSPACE_RESOURCE_OBJECT_TYPE, + raise StackError('Field "{}" ({}) not consistent ' + 'with actual object type ({})'.format(WORKSPACE_RESOURCE_OBJECT_TYPE, object_type, actual_object_type)) @@ -379,7 +379,7 @@ def _download_workspace(self, resource_properties, overwrite): elif object_type == DIRECTORY: self.workspace_client.export_workspace_dir(workspace_path, local_path, overwrite) else: - raise StackError("Invalid value for '{}' field: {}" + raise StackError('Invalid value for "{}" field: {}' .format(WORKSPACE_RESOURCE_OBJECT_TYPE, object_type)) def _deploy_dbfs(self, resource_properties, physical_id, overwrite): @@ -404,8 +404,8 @@ def _deploy_dbfs(self, resource_properties, physical_id, overwrite): if is_dir != os.path.isdir(local_path): dir_or_file = 'directory' if os.path.isdir(local_path) else 'file' - raise StackError("local source_path '{}' is found to be a {}, but is not specified" - " as one with is_dir: {}." + raise StackError('local source_path "{}" is found to be a {}, but is not specified' + ' as one with is_dir: {}.' .format(local_path, dir_or_file, str(is_dir).lower())) if is_dir: click.echo('Uploading directory from {} to DBFS at {}'.format(local_path, dbfs_path)) @@ -445,7 +445,7 @@ def _validate_config(self, stack_config): # Error on duplicate resource ID's if resource_id in seen_resource_ids: - raise StackError("Duplicate resource ID '{}' found, please resolve.".format( + raise StackError('Duplicate resource ID "{}" found, please resolve.'.format( resource_id)) seen_resource_ids.add(resource_id) @@ -472,20 +472,20 @@ def _validate_config(self, stack_config): self._assert_field_in_resource_properties(DBFS_RESOURCE_IS_DIR, resource_properties, DBFS_SERVICE, resource_id) else: - raise StackError("Resource service '{}' not supported".format(resource_service)) + raise StackError('Resource service "{}" not supported'.format(resource_service)) def _assert_field_in_resource_properties(self, field, properties, service, resource_id): if field not in properties: - raise StackError('"{}" doesn\'t exist in "{}" of {} resource with ID "{}"' + raise StackError('"{}" not in "{}" of {} resource with ID "{}"' .format(field, RESOURCE_PROPERTIES, service, resource_id)) def _assert_field_in_resource(self, field, resource): if field not in resource: - raise StackError("{} doesn't exist in resource config".format(field)) + raise StackError('"{}" not in resource config'.format(field)) def _assert_field_in_stack_config(self, field, stack_config): if field not in stack_config: - raise StackError("'{}' not in stack config".format(field)) + raise StackError('"{}" not in stack config'.format(field)) def _validate_status(self, stack_status): """ @@ -527,16 +527,16 @@ def _validate_status(self, stack_status): def _assert_field_in_resource_physical_id(self, field, physical_id, service, resource_id): if field not in physical_id: - raise StackError('"{}" doesn\'t exist in "{}" of {} resource status with ID "{}"' + raise StackError('"{}" not in "{}" of {} resource status with ID "{}"' .format(field, RESOURCE_PHYSICAL_ID, service, resource_id)) def _assert_field_in_resource_status(self, field, resource_status): if field not in resource_status: - raise StackError('"{}" doesn\'t exist in resource status'.format(field)) + raise StackError('"{}" not in resource status'.format(field)) def _assert_field_in_stack_status(self, field, properties): if field not in properties: - raise StackError("'{}' not in stack status".format(field)) + raise StackError('"{}" not in stack status'.format(field)) def _get_resource_to_status_map(self, stack_status): """ From fcbf3fc04ab2fbc4513693f54d708bb90791fad2 Mon Sep 17 00:00:00 2001 From: Andrew Linxie Date: Thu, 2 Aug 2018 18:09:17 -0700 Subject: [PATCH 07/17] fix bug --- databricks_cli/stack/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/databricks_cli/stack/cli.py b/databricks_cli/stack/cli.py index 02920bfb..70e1a8ac 100644 --- a/databricks_cli/stack/cli.py +++ b/databricks_cli/stack/cli.py @@ -99,13 +99,13 @@ def deploy(api_client, config_path, **kwargs): cli_dir = os.getcwd() os.chdir(config_dir) # Switch current working directory to where json config is stored new_stack_status = StackApi(api_client).deploy(stack_config, stack_status, **kwargs) + click.echo('#' * 80) os.chdir(cli_dir) click.echo("Saving stack status to {}".format(status_path)) _save_json(status_path, new_stack_status) click.echo('Warning- The stack status file is an automatically generated file that is' ' depended on for the databricks stack CLI to function correctly.' ' Please do not edit the file.') - StackApi(api_client).deploy(config_path, **kwargs) click.echo('#' * 80) From b840922c115818733cf7138bb99499d26761b98f Mon Sep 17 00:00:00 2001 From: Andrew Linxie Date: Thu, 2 Aug 2018 18:32:29 -0700 Subject: [PATCH 08/17] Change Job ID global variable usage --- databricks_cli/stack/api.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/databricks_cli/stack/api.py b/databricks_cli/stack/api.py index a16a5474..8eeef191 100644 --- a/databricks_cli/stack/api.py +++ b/databricks_cli/stack/api.py @@ -247,12 +247,12 @@ def _deploy_job(self, resource_properties, physical_id=None): job_settings = resource_properties # resource_properties of jobs are solely job settings. if physical_id: - job_id = physical_id.get('job_id') + job_id = physical_id.get(JOBS_RESOURCE_ID) self._update_job(job_settings, job_id) else: job_id = self._put_job(job_settings) click.echo("Job deployed on Databricks with Job ID {}".format(job_id)) - physical_id = {'job_id': job_id} + physical_id = {JOBS_RESOURCE_ID: job_id} deploy_output = self.jobs_client.get_job(job_id) return physical_id, deploy_output @@ -294,7 +294,7 @@ def _update_job(self, job_settings, job_id): :param job_id: physical job_id of job in databricks server. """ - self.jobs_client.reset_job({JOBS_RESOURCE_ID: job_id, 'new_settings': job_settings}) + self.jobs_client.reset_job({'job_id': job_id, 'new_settings': job_settings}) def _deploy_workspace(self, resource_properties, physical_id, overwrite): """ From 2b1676d9c77cbac0901e73a0ddb3c707ba8a8814 Mon Sep 17 00:00:00 2001 From: Diego Rabatone Oliveira Date: Thu, 2 Aug 2018 23:49:46 -0300 Subject: [PATCH 09/17] Add groups SDK to the CLI. (#114) * Add tests for GROUPS api * Add GroupsAPI and CLI for GroupsAPI --- databricks_cli/cli.py | 2 + databricks_cli/groups/__init__.py | 0 databricks_cli/groups/api.py | 101 +++++++++++++++++ databricks_cli/groups/cli.py | 167 +++++++++++++++++++++++++++ databricks_cli/groups/exceptions.py | 26 +++++ tests/groups/__init__.py | 22 ++++ tests/groups/test_cli.py | 168 ++++++++++++++++++++++++++++ 7 files changed, 486 insertions(+) create mode 100644 databricks_cli/groups/__init__.py create mode 100644 databricks_cli/groups/api.py create mode 100644 databricks_cli/groups/cli.py create mode 100644 databricks_cli/groups/exceptions.py create mode 100644 tests/groups/__init__.py create mode 100644 tests/groups/test_cli.py diff --git a/databricks_cli/cli.py b/databricks_cli/cli.py index fa73ccb2..ee3a0091 100644 --- a/databricks_cli/cli.py +++ b/databricks_cli/cli.py @@ -35,6 +35,7 @@ from databricks_cli.runs.cli import runs_group from databricks_cli.secrets.cli import secrets_group from databricks_cli.stack.cli import stack_group +from databricks_cli.groups.cli import groups_group @click.group(context_settings=CONTEXT_SETTINGS) @@ -55,3 +56,4 @@ def cli(): cli.add_command(libraries_group, name='libraries') cli.add_command(secrets_group, name='secrets') cli.add_command(stack_group, name='stack') +cli.add_command(groups_group, name='groups') diff --git a/databricks_cli/groups/__init__.py b/databricks_cli/groups/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/databricks_cli/groups/api.py b/databricks_cli/groups/api.py new file mode 100644 index 00000000..4169056f --- /dev/null +++ b/databricks_cli/groups/api.py @@ -0,0 +1,101 @@ +"""Implement Databricks Groups API, interfacing with the GroupsService.""" +# Databricks CLI +# Copyright 2017 Databricks, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"), except +# that the use of services to which certain application programming +# interfaces (each, an "API") connect requires that the user first obtain +# a license for the use of the APIs from Databricks, Inc. ("Databricks"), +# by creating an account at www.databricks.com and agreeing to either (a) +# the Community Edition Terms of Service, (b) the Databricks Terms of +# Service, or (c) another written agreement between Licensee and Databricks +# for the use of the APIs. +# +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from databricks_cli.sdk import GroupsService +from databricks_cli.groups.exceptions import GroupsException + + +class GroupsApi(object): + """Implement the databricks '2.0/groups' API Interface.""" + + def __init__(self, api_client): + self.client = GroupsService(api_client) + + def add_member(self, parent_name, member_type, member_name): + """Add a user or group to a group. + + member_type is either 'group' or 'user'. + member_name is the name of the member. + """ + if member_type == "group": + return self.client.add_to_group(parent_name=parent_name, + group_name=member_name) + elif member_type == "user": + return self.client.add_to_group(parent_name=parent_name, + user_name=member_name) + else: + raise GroupsException("Invalid 'member_type' {}".format( + member_type + )) + + def create(self, group_name): + """Create a new group with the given name.""" + return self.client.create_group(group_name) + + def list_members(self, group_name): + """Return all of the members of a particular group.""" + return self.client.get_group_members(group_name) + + def list_all(self): + """Return all of the groups in an organization.""" + return self.client.get_groups() + + def list_parents(self, member_type, member_name): + """Retrieve all groups in which a given user or group is a member. + + member_type is either 'group' or 'user'. + member_name is the name of the member. + + Note: this method is non-recursive - it will return all groups in + which the given user or group is a member but not the groups in which + those groups are members). + """ + if member_type == "group": + return self.client.get_groups_for_principal(group_name=member_name) + elif member_type == "user": + return self.client.get_groups_for_principal(user_name=member_name) + else: + raise GroupsException("Invalid 'member_type' {}".format( + member_type + )) + + def remove_member(self, parent_name, member_type, member_name): + """Remove a user or group from a group. + + member_type is either 'group' or 'user'. + member_name is the name of the member. + """ + if member_type == "group": + return self.client.remove_from_group(parent_name=parent_name, + group_name=member_name) + elif member_type == "user": + return self.client.remove_from_group(parent_name=parent_name, + user_name=member_name) + else: + raise GroupsException("Invalid 'member_type' {}".format( + member_type + )) + + def delete(self, group_name): + """Remove a group from this organization.""" + return self.client.remove_group(group_name) diff --git a/databricks_cli/groups/cli.py b/databricks_cli/groups/cli.py new file mode 100644 index 00000000..478c9675 --- /dev/null +++ b/databricks_cli/groups/cli.py @@ -0,0 +1,167 @@ +"""Provide the API methods for GROUPs Databricks REST endpoint.""" +# Databricks CLI +# Copyright 2017 Databricks, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"), except +# that the use of services to which certain application programming +# interfaces (each, an "API") connect requires that the user first obtain +# a license for the use of the APIs from Databricks, Inc. ("Databricks"), +# by creating an account at www.databricks.com and agreeing to either (a) +# the Community Edition Terms of Service, (b) the Databricks Terms of +# Service, or (c) another written agreement between Licensee and Databricks +# for the use of the APIs. +# +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import click + +from databricks_cli.groups.api import GroupsApi +from databricks_cli.utils import eat_exceptions, CONTEXT_SETTINGS, pretty_format +from databricks_cli.configure.config import provide_api_client, profile_option +from databricks_cli.version import print_version_callback, version + + +@click.command(context_settings=CONTEXT_SETTINGS, + short_help="Add an existing group to another existing group.") +@click.option("--parent-name", required=True, + help=("Name of the parent group to which the new member will be " + " added. This field is required.")) +@click.option("--user-name", required=False, + help="If user_name, the user name.") +@click.option("--group-name", required=False, + help="If group_name, the group name.") +@profile_option +@eat_exceptions +@provide_api_client +def add_member_cli(api_client, parent_name, user_name, group_name): + """Add a user or group to a group.""" + member_type = None + member_name = None + if user_name: + member_type = "user" + member_name = user_name + elif group_name: + member_type = "group" + member_name = group_name + GroupsApi(api_client).add_member(parent_name, member_type, member_name) + + +@click.command(context_settings=CONTEXT_SETTINGS, + short_help="Create a new group with the given name.") +@click.option("--group-name", required=True) +@profile_option +@eat_exceptions +@provide_api_client +def create_cli(api_client, group_name): + """Create a new group with the given name.""" + content = GroupsApi(api_client).create(group_name) + click.echo(pretty_format(content)) + + +@click.command(context_settings=CONTEXT_SETTINGS, + short_help="Return all of the members of a particular group.") +@click.option("--group-name", required=True) +@profile_option +@eat_exceptions +@provide_api_client +def list_members_cli(api_client, group_name): + """Return all of the members of a particular group.""" + content = GroupsApi(api_client).list_members(group_name) + click.echo(pretty_format(content)) + + +@click.command(context_settings=CONTEXT_SETTINGS, + short_help="Return all of the groups in an organization.") +@profile_option +@eat_exceptions +@provide_api_client +def list_all_cli(api_client): + """Return all of the groups in an organization.""" + content = GroupsApi(api_client).list_all() + click.echo(pretty_format(content)) + + +@click.command(context_settings=CONTEXT_SETTINGS, + short_help="Retrieve all groups in which a given user or group is a member.") +@click.option("--user-name", required=False) +@click.option("--group-name", required=False) +@profile_option +@eat_exceptions +@provide_api_client +def list_parents_cli(api_client, user_name=None, group_name=None): + """Retrieve all groups in which a given user or group is a member.""" + member_type = None + member_name = None + if user_name: + member_type = "user" + member_name = user_name + elif group_name: + member_type = "group" + member_name = group_name + content = GroupsApi(api_client).list_parents(member_type, member_name) + click.echo(pretty_format(content)) + + +@click.command(context_settings=CONTEXT_SETTINGS, + short_help="Removes a user or group from a group.") +@click.option("--parent-name", required=True) +@click.option("--user-name", required=False) +@click.option("--group-name", required=False) +@profile_option +@eat_exceptions +@provide_api_client +def remove_member_cli(api_client, parent_name, user_name=None, + group_name=None): + """Remove a user or group from a group.""" + member_type = None + member_name = None + if user_name: + member_type = "user" + member_name = user_name + elif group_name: + member_type = "group" + member_name = group_name + GroupsApi(api_client).remove_member(parent_name=parent_name, + member_type=member_type, + member_name=member_name) + + +@click.command(context_settings=CONTEXT_SETTINGS, + short_help="Remove a group from this organization.") +@click.option("--group-name", required=False) +@profile_option +@eat_exceptions +@provide_api_client +def delete_cli(api_client, group_name): + """Remove a group from this organization.""" + content = GroupsApi(api_client).delete(group_name) + click.echo(pretty_format(content)) + + +@click.group(context_settings=CONTEXT_SETTINGS, + short_help="Utility to interact with Databricks groups.") +@click.option("--version", "-v", is_flag=True, callback=print_version_callback, + expose_value=False, is_eager=True, help=version) +@profile_option +@eat_exceptions +def groups_group(): + """Provide utility to interact with Databricks groups.""" + pass + + +groups_group.add_command(add_member_cli, name="add-member") +groups_group.add_command(create_cli, name="create") +groups_group.add_command(list_members_cli, name="list-members") +groups_group.add_command(list_all_cli, name="list") +groups_group.add_command(list_parents_cli, name="list-parents") +groups_group.add_command(remove_member_cli, name="remove-member") +groups_group.add_command(delete_cli, name="delete") diff --git a/databricks_cli/groups/exceptions.py b/databricks_cli/groups/exceptions.py new file mode 100644 index 00000000..f4e44384 --- /dev/null +++ b/databricks_cli/groups/exceptions.py @@ -0,0 +1,26 @@ +# Databricks CLI +# Copyright 2018 Databricks, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"), except +# that the use of services to which certain application programming +# interfaces (each, an "API") connect requires that the user first obtain +# a license for the use of the APIs from Databricks, Inc. ("Databricks"), +# by creating an account at www.databricks.com and agreeing to either (a) +# the Community Edition Terms of Service, (b) the Databricks Terms of +# Service, or (c) another written agreement between Licensee and Databricks +# for the use of the APIs. +# +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +class GroupsException(Exception): + pass diff --git a/tests/groups/__init__.py b/tests/groups/__init__.py new file mode 100644 index 00000000..b0c9feac --- /dev/null +++ b/tests/groups/__init__.py @@ -0,0 +1,22 @@ +# Databricks CLI +# Copyright 2017 Databricks, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"), except +# that the use of services to which certain application programming +# interfaces (each, an "API") connect requires that the user first obtain +# a license for the use of the APIs from Databricks, Inc. ("Databricks"), +# by creating an account at www.databricks.com and agreeing to either (a) +# the Community Edition Terms of Service, (b) the Databricks Terms of +# Service, or (c) another written agreement between Licensee and Databricks +# for the use of the APIs. +# +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/groups/test_cli.py b/tests/groups/test_cli.py new file mode 100644 index 00000000..71fcfcad --- /dev/null +++ b/tests/groups/test_cli.py @@ -0,0 +1,168 @@ +# Databricks CLI +# Copyright 2017 Databricks, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"), except +# that the use of services to which certain application programming +# interfaces (each, an "API") connect requires that the user first obtain +# a license for the use of the APIs from Databricks, Inc. ("Databricks"), +# by creating an account at www.databricks.com and agreeing to either (a) +# the Community Edition Terms of Service, (b) the Databricks Terms of +# Service, or (c) another written agreement between Licensee and Databricks +# for the use of the APIs. +# +# You may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint:disable=redefined-outer-name + +import mock +import pytest +from click.testing import CliRunner + +from databricks_cli.groups import cli +from databricks_cli.utils import pretty_format +from tests.utils import provide_conf + +CREATE_RETURN = {"group_id": "test"} +TEST_GROUP = "test_group" +TEST_PARENT_GROUP = "test_parent_group" +TEST_USER = "test_user" +GROUPS = { + "group_names": [ + TEST_GROUP, + TEST_PARENT_GROUP + ] +} +GROUP_MEMBERS = { + "members": [ + { + "user_name": TEST_USER + } + ] +} +GROUP_PARENTS = { + "group_names": [ + TEST_PARENT_GROUP + ] +} + + +@pytest.fixture() +def group_api_mock(): + with mock.patch('databricks_cli.groups.cli.GroupsApi') as mocked: + _group_api_mock = mock.MagicMock() + mocked.return_value = _group_api_mock + yield _group_api_mock + + +@provide_conf +def test_add_user_to_group(group_api_mock): + runner = CliRunner() + runner.invoke(cli.add_member_cli, + ["--parent-name", TEST_PARENT_GROUP, + "--user-name", TEST_USER]) + group_api_mock.add_member.assert_called_once_with( + TEST_PARENT_GROUP, + 'user', + TEST_USER + ) + + +@provide_conf +def test_add_group_to_parent_group(group_api_mock): + runner = CliRunner() + runner.invoke(cli.add_member_cli, + ["--parent-name", TEST_PARENT_GROUP, + "--group-name", TEST_GROUP]) + group_api_mock.add_member.assert_called_once_with( + TEST_PARENT_GROUP, + 'group', + TEST_GROUP + ) + + +@provide_conf +def test_create_cli(group_api_mock): + runner = CliRunner() + runner.invoke(cli.create_cli, ["--group-name", TEST_GROUP]) + group_api_mock.create.assert_called_once_with(TEST_GROUP) + + +@provide_conf +def test_list_members_cli(group_api_mock): + with mock.patch('databricks_cli.groups.cli.click.echo') as echo_mock: + group_api_mock.list_members.return_value = GROUP_MEMBERS + runner = CliRunner() + runner.invoke(cli.list_members_cli, ["--group-name", TEST_GROUP]) + group_api_mock.list_members.assert_called_once_with(TEST_GROUP) + echo_mock.assert_called_once_with(pretty_format(GROUP_MEMBERS)) + + +@provide_conf +def test_list_all_cli(group_api_mock): + with mock.patch('databricks_cli.groups.cli.click.echo') as echo_mock: + group_api_mock.list_all.return_value = GROUPS + runner = CliRunner() + runner.invoke(cli.list_all_cli) + group_api_mock.list_all.assert_called_once() + echo_mock.assert_called_once_with(pretty_format(GROUPS)) + + +@provide_conf +def test_list_group_parents_cli(group_api_mock): + with mock.patch('databricks_cli.groups.cli.click.echo') as echo_mock: + group_api_mock.list_parents.return_value = GROUP_PARENTS + runner = CliRunner() + runner.invoke(cli.list_parents_cli, ['--group-name', TEST_GROUP]) + group_api_mock.list_parents.assert_called_once_with('group', + TEST_GROUP) + echo_mock.assert_called_once_with(pretty_format(GROUP_PARENTS)) + + +@provide_conf +def test_list_user_groups_cli(group_api_mock): + with mock.patch('databricks_cli.groups.cli.click.echo') as echo_mock: + group_api_mock.list_parents.return_value = GROUPS + runner = CliRunner() + runner.invoke(cli.list_parents_cli, ['--user-name', TEST_USER]) + group_api_mock.list_parents.assert_called_once_with('user', TEST_USER) + echo_mock.assert_called_once_with(pretty_format(GROUPS)) + + +@provide_conf +def test_remove_user_from_group_cli(group_api_mock): + runner = CliRunner() + runner.invoke(cli.remove_member_cli, ["--parent-name", TEST_PARENT_GROUP, + "--user-name", TEST_USER]) + group_api_mock.remove_member.assert_called_once_with( + parent_name=TEST_PARENT_GROUP, + member_type='user', + member_name=TEST_USER + ) + + +@provide_conf +def test_remove_group_from_parent_cli(group_api_mock): + runner = CliRunner() + runner.invoke(cli.remove_member_cli, ["--parent-name", TEST_PARENT_GROUP, + "--group-name", TEST_GROUP]) + group_api_mock.remove_member.assert_called_once_with( + parent_name=TEST_PARENT_GROUP, + member_type='group', + member_name=TEST_GROUP + ) + + +@provide_conf +def test_delete_cli(group_api_mock): + runner = CliRunner() + runner.invoke(cli.delete_cli, ["--group-name", TEST_GROUP]) + group_api_mock.delete.assert_called_once_with(TEST_GROUP) From 7b4ad8bd2e1a69de87c85b3c174676e3bcc76998 Mon Sep 17 00:00:00 2001 From: Andrew Linxie Date: Fri, 3 Aug 2018 15:40:11 -0700 Subject: [PATCH 10/17] switching more instances of deploy_config to deploy and download_config to download --- databricks_cli/stack/api.py | 4 ++-- tests/stack/test_api.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/databricks_cli/stack/api.py b/databricks_cli/stack/api.py index 6512904a..29fe06ac 100644 --- a/databricks_cli/stack/api.py +++ b/databricks_cli/stack/api.py @@ -250,12 +250,12 @@ def _deploy_job(self, resource_properties, physical_id=None): job_settings = resource_properties # resource_properties of jobs are solely job settings. if physical_id: - job_id = physical_id.get(JOBS_RESOURCE_ID) + job_id = physical_id.get(JOBS_RESOURCE_JOB_ID) self._update_job(job_settings, job_id) else: job_id = self._put_job(job_settings) click.echo("Job deployed on Databricks with Job ID {}".format(job_id)) - physical_id = {JOBS_RESOURCE_ID: job_id} + physical_id = {JOBS_RESOURCE_JOB_ID: job_id} deploy_output = self.jobs_client.get_job(job_id) return physical_id, deploy_output diff --git a/tests/stack/test_api.py b/tests/stack/test_api.py index 616d2022..f1485826 100644 --- a/tests/stack/test_api.py +++ b/tests/stack/test_api.py @@ -552,7 +552,7 @@ def test_deploy_valid_stack_status(self, stack_api, tmpdir): # Run deploy command on stack and then verify stack status. Validation for the stack # status is done within the deploy command, so if the function doesn't error, then # the generated stack is valid. - new_stack_status_1 = stack_api.deploy_config(test_stack) + new_stack_status_1 = stack_api.deploy(test_stack) # Asserting some correctness criteria for the generated stack status. assert new_stack_status_1.get(api.STACK_RESOURCES) == test_stack.get(api.STACK_RESOURCES) for deployed_resource in new_stack_status_1.get(api.STACK_DEPLOYED): @@ -561,7 +561,7 @@ def test_deploy_valid_stack_status(self, stack_api, tmpdir): # Test that when passing in a status that the new status generated will still be valid. new_stack_status_2 = stack_api.deploy(test_stack, stack_status=TEST_STATUS) - # Test some extra correctness criteria for the generated stack status. + # Asserting some correctness criteria for the generated stack status. assert new_stack_status_2.get(api.STACK_RESOURCES) == test_stack.get(api.STACK_RESOURCES) for deployed_resource in new_stack_status_2.get(api.STACK_DEPLOYED): assert deployed_resource.get(api.RESOURCE_DEPLOY_OUTPUT) == test_deploy_output @@ -574,7 +574,7 @@ def test_download(self, stack_api): stack_api._download_workspace = mock.MagicMock() stack_api._download_resource = mock.Mock(wraps=stack_api._download_resource) - stack_api.download_from_config(TEST_STACK) + stack_api.download(TEST_STACK) # because there is only two workspace resources, _download assert stack_api._download_resource.call_count == 5 assert stack_api._download_workspace.call_count == 2 From aa58b296b9d0509691dc18f84ba2f1e6ddf68443 Mon Sep 17 00:00:00 2001 From: Aaron Davidson Date: Mon, 6 Aug 2018 14:03:11 -0700 Subject: [PATCH 11/17] Enable Python clients to set an overriding DatabricksConfigProvider (#158) --- databricks_cli/click_types.py | 4 - databricks_cli/configure/cli.py | 6 +- databricks_cli/configure/config.py | 13 ++- databricks_cli/configure/provider.py | 151 +++++++++++++++++++++++---- databricks_cli/utils.py | 22 ++-- tests/configure/test_cli.py | 28 ++--- tests/configure/test_provider.py | 88 ++++++++++++---- 7 files changed, 234 insertions(+), 78 deletions(-) diff --git a/databricks_cli/click_types.py b/databricks_cli/click_types.py index c7a37357..797fef59 100644 --- a/databricks_cli/click_types.py +++ b/databricks_cli/click_types.py @@ -23,8 +23,6 @@ from click import ParamType, Option, MissingParameter, UsageError -from databricks_cli.configure.provider import DEFAULT_SECTION - class OutputClickType(ParamType): name = 'FORMAT' @@ -117,6 +115,4 @@ def set_profile(self, profile): self._profile = profile def get_profile(self): - if self._profile is None: - return DEFAULT_SECTION return self._profile diff --git a/databricks_cli/configure/cli.py b/databricks_cli/configure/cli.py index 396ee877..5242b63b 100644 --- a/databricks_cli/configure/cli.py +++ b/databricks_cli/configure/cli.py @@ -26,7 +26,7 @@ from click import ParamType from databricks_cli.configure.provider import DatabricksConfig, update_and_persist_config, \ - get_config_for_profile + ProfileConfigProvider from databricks_cli.utils import CONTEXT_SETTINGS from databricks_cli.configure.config import profile_option, get_profile_from_context, debug_option @@ -37,7 +37,7 @@ def _configure_cli_token(profile, insecure): - config = get_config_for_profile(profile) + config = ProfileConfigProvider(profile).get_config() or DatabricksConfig.empty() host = click.prompt(PROMPT_HOST, default=config.host, type=_DbfsHost()) token = click.prompt(PROMPT_TOKEN, default=config.token) new_config = DatabricksConfig.from_token(host, token, insecure) @@ -45,7 +45,7 @@ def _configure_cli_token(profile, insecure): def _configure_cli_password(profile, insecure): - config = get_config_for_profile(profile) + config = ProfileConfigProvider(profile).get_config() or DatabricksConfig.empty() if config.password: default_password = '*' * len(config.password) else: diff --git a/databricks_cli/configure/config.py b/databricks_cli/configure/config.py index cb32ef63..f6ca04e4 100644 --- a/databricks_cli/configure/config.py +++ b/databricks_cli/configure/config.py @@ -26,7 +26,7 @@ import six from databricks_cli.click_types import ContextObject -from databricks_cli.configure.provider import get_config_for_profile +from databricks_cli.configure.provider import get_config, ProfileConfigProvider from databricks_cli.utils import InvalidConfigurationError from databricks_cli.sdk import ApiClient @@ -42,9 +42,14 @@ def decorator(*args, **kwargs): command_name = "-".join(ctx.command_path.split(" ")[1:]) command_name += "-" + str(uuid.uuid1()) profile = get_profile_from_context() - config = get_config_for_profile(profile) - if not config.is_valid: - raise InvalidConfigurationError(profile) + if profile: + # If we request a specific profile, only get credentials from tere. + config = ProfileConfigProvider(profile).get_config() + else: + # If unspecified, use the default provider, or allow for user overrides. + config = get_config() + if not config or not config.is_valid: + raise InvalidConfigurationError.for_profile(profile) kwargs['api_client'] = _get_api_client(config, command_name) return function(*args, **kwargs) diff --git a/databricks_cli/configure/provider.py b/databricks_cli/configure/provider.py index b8894b45..3dfe4dd4 100644 --- a/databricks_cli/configure/provider.py +++ b/databricks_cli/configure/provider.py @@ -20,10 +20,14 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +from abc import abstractmethod, ABCMeta from configparser import ConfigParser +import os from os.path import expanduser, join -import os +from databricks_cli.utils import InvalidConfigurationError + _home = expanduser('~') HOST = 'host' @@ -33,6 +37,9 @@ INSECURE = 'insecure' DEFAULT_SECTION = 'DEFAULT' +# User-provided override for the DatabricksConfigProvider +_config_provider = None + def _get_path(): return join(_home, '.databrickscfg') @@ -81,6 +88,7 @@ def update_and_persist_config(profile, databricks_config): same profile. :param databricks_config: DatabricksConfig """ + profile = profile if profile else DEFAULT_SECTION raw_config = _fetch_from_fs() _create_section_if_absent(raw_config, profile) _set_option(raw_config, profile, HOST, databricks_config.host) @@ -91,35 +99,136 @@ def update_and_persist_config(profile, databricks_config): _overwrite_config(raw_config) +def get_config(): + """ + Returns a DatabricksConfig containing the hostname and authentication used to talk to + the Databricks API. By default, we leverage the DefaultConfigProvider to get + this config, but this behavior may be overridden by calling 'set_config_provider' + + If no DatabricksConfig can be found, an InvalidConfigurationError will be raised. + """ + global _config_provider + if _config_provider: + config = _config_provider.get_config() + if config: + return config + raise InvalidConfigurationError( + 'Custom provider returned no DatabricksConfig: %s' % _config_provider) + + config = DefaultConfigProvider().get_config() + if config: + return config + raise InvalidConfigurationError.for_profile(None) + + def get_config_for_profile(profile): """ - Reads from the filesystem and gets a DatabricksConfig for the specified profile. If it does not - exist, then return a DatabricksConfig with fields set. + [Deprecated] Reads from the filesystem and gets a DatabricksConfig for the + specified profile. If it does not exist, then return a DatabricksConfig with fields set + to None. + + Internal callers should prefer get_config() to use user-specified overrides, and + to return appropriate error messages as opposited to invalid configurations. + + If you want to read from a specific profile, please instead use + 'ProfileConfigProvider(profile).get_config()'. + + This method is maintained for backwards-compatibility. It may be removed in future versions. + :return: DatabricksConfig """ - if is_environment_set(): + profile = profile if profile else DEFAULT_SECTION + config = EnvironmentVariableConfigProvider().get_config() + if config and config.is_valid: + return config + + config = ProfileConfigProvider(profile).get_config() + if config: + return config + return DatabricksConfig(None, None, None, None, None) + + +def set_config_provider(provider): + """ + Sets a DatabricksConfigProvider that will be used for all future calls to get_config(), + used by the Databricks CLI code to discover the user's credentials. + """ + global _config_provider + if provider and not isinstance(provider, DatabrickConfigProvider): + raise Exception('Must be instance of DatabrickConfigProvider: %s' % _config_provider) + _config_provider = provider + + +def get_config_provider(): + """ + Returns the current DatabricksConfigProvider. + If None, the DefaultConfigProvider will be used. + """ + global _config_provider + return _config_provider + + +class DatabrickConfigProvider(object): + """ + Responsible for providing hostname and authentication information to make + API requests against the Databricks REST API. + This method should generally return None if it cannot provide credentials, in order + to facilitate chanining of providers. + """ + + __metaclass__ = ABCMeta + + @abstractmethod + def get_config(self): + pass + + +class DefaultConfigProvider(DatabrickConfigProvider): + """Prefers environment variables, and then the default profile.""" + def __init__(self): + self.env_provider = EnvironmentVariableConfigProvider() + self.default_profile_provider = ProfileConfigProvider() + + def get_config(self): + env_config = self.env_provider.get_config() + if env_config: + return env_config + + profile_config = self.default_profile_provider.get_config() + if profile_config: + return profile_config + + +class EnvironmentVariableConfigProvider(DatabrickConfigProvider): + """Loads from system environment variables.""" + def get_config(self): host = os.environ.get('DATABRICKS_HOST') username = os.environ.get('DATABRICKS_USERNAME') password = os.environ.get('DATABRICKS_PASSWORD') token = os.environ.get('DATABRICKS_TOKEN') insecure = os.environ.get('DATABRICKS_INSECURE') - return DatabricksConfig(host, username, password, token, insecure) - raw_config = _fetch_from_fs() - host = _get_option_if_exists(raw_config, profile, HOST) - username = _get_option_if_exists(raw_config, profile, USERNAME) - password = _get_option_if_exists(raw_config, profile, PASSWORD) - token = _get_option_if_exists(raw_config, profile, TOKEN) - insecure = _get_option_if_exists(raw_config, profile, INSECURE) - return DatabricksConfig(host, username, password, token, insecure) + config = DatabricksConfig(host, username, password, token, insecure) + if config.is_valid: + return config + return None + +class ProfileConfigProvider(DatabrickConfigProvider): + """Loads from the databrickscfg file.""" + def __init__(self, profile=DEFAULT_SECTION): + self.profile = profile -def is_environment_set(): - token_exists = (os.environ.get('DATABRICKS_HOST') - and os.environ.get('DATABRICKS_TOKEN')) - password_exists = (os.environ.get('DATABRICKS_HOST') - and os.environ.get('DATABRICKS_USERNAME') - and os.environ.get('DATABRICKS_PASSWORD')) - return token_exists or password_exists + def get_config(self): + raw_config = _fetch_from_fs() + host = _get_option_if_exists(raw_config, self.profile, HOST) + username = _get_option_if_exists(raw_config, self.profile, USERNAME) + password = _get_option_if_exists(raw_config, self.profile, PASSWORD) + token = _get_option_if_exists(raw_config, self.profile, TOKEN) + insecure = _get_option_if_exists(raw_config, self.profile, INSECURE) + config = DatabricksConfig(host, username, password, token, insecure) + if config.is_valid: + return config + return None class DatabricksConfig(object): @@ -138,6 +247,10 @@ def from_token(cls, host, token, insecure=None): def from_password(cls, host, username, password, insecure=None): return DatabricksConfig(host, username, password, None, insecure) + @classmethod + def empty(cls): + return DatabricksConfig(None, None, None, None, None) + @property def is_valid_with_token(self): return self.host is not None and self.token is not None diff --git a/databricks_cli/utils.py b/databricks_cli/utils.py index 76a41668..696e5cc5 100644 --- a/databricks_cli/utils.py +++ b/databricks_cli/utils.py @@ -29,7 +29,6 @@ import six from requests.exceptions import HTTPError -from databricks_cli.configure.provider import DEFAULT_SECTION from databricks_cli.click_types import ContextObject CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help']) @@ -88,13 +87,14 @@ def truncate_string(s, length=100): class InvalidConfigurationError(RuntimeError): - def __init__(self, profile=DEFAULT_SECTION): - if profile == DEFAULT_SECTION: - message = ('You haven\'t configured the CLI yet! ' - 'Please configure by entering `{} configure`'.format(sys.argv[0])) - else: - message = ('You haven\'t configured the CLI yet for the profile {profile}! ' - 'Please configure by entering ' - '`{argv} configure --profile {profile}`').format( - profile=profile, argv=sys.argv[0]) - super(InvalidConfigurationError, self).__init__(message) + @staticmethod + def for_profile(profile): + if profile is None: + return InvalidConfigurationError( + 'You haven\'t configured the CLI yet! ' + 'Please configure by entering `{} configure`'.format(sys.argv[0])) + return InvalidConfigurationError( + ('You haven\'t configured the CLI yet for the profile {profile}! ' + 'Please configure by entering ' + '`{argv} configure --profile {profile}`').format( + profile=profile, argv=sys.argv[0])) diff --git a/tests/configure/test_cli.py b/tests/configure/test_cli.py index 50c02241..bf74ac16 100644 --- a/tests/configure/test_cli.py +++ b/tests/configure/test_cli.py @@ -26,7 +26,7 @@ from click.testing import CliRunner import databricks_cli.configure.cli as cli -from databricks_cli.configure.provider import get_config_for_profile, DEFAULT_SECTION +from databricks_cli.configure.provider import get_config, ProfileConfigProvider TEST_HOST = 'https://test.cloud.databricks.com' TEST_USER = 'monkey@databricks.com' @@ -44,18 +44,18 @@ def test_configure_cli(): TEST_USER + '\n' + TEST_PASSWORD + '\n' + TEST_PASSWORD + '\n')) - assert get_config_for_profile(DEFAULT_SECTION).host == TEST_HOST - assert get_config_for_profile(DEFAULT_SECTION).username == TEST_USER - assert get_config_for_profile(DEFAULT_SECTION).password == TEST_PASSWORD + assert get_config().host == TEST_HOST + assert get_config().username == TEST_USER + assert get_config().password == TEST_PASSWORD def test_configure_cli_token(): runner = CliRunner() runner.invoke(cli.configure_cli, ['--token'], input=(TEST_HOST + '\n' + TEST_TOKEN + '\n')) - assert get_config_for_profile(DEFAULT_SECTION).host == TEST_HOST - assert get_config_for_profile(DEFAULT_SECTION).token == TEST_TOKEN - assert get_config_for_profile(DEFAULT_SECTION).insecure is None + assert get_config().host == TEST_HOST + assert get_config().token == TEST_TOKEN + assert get_config().insecure is None def test_configure_two_sections(): @@ -64,16 +64,16 @@ def test_configure_two_sections(): input=(TEST_HOST + '\n' + TEST_TOKEN + '\n')) runner.invoke(cli.configure_cli, ['--token', '--profile', TEST_PROFILE], input=(TEST_HOST_2 + '\n' + TEST_TOKEN + '\n')) - assert get_config_for_profile(DEFAULT_SECTION).host == TEST_HOST - assert get_config_for_profile(DEFAULT_SECTION).token == TEST_TOKEN - assert get_config_for_profile(TEST_PROFILE).host == TEST_HOST_2 - assert get_config_for_profile(TEST_PROFILE).token == TEST_TOKEN + assert get_config().host == TEST_HOST + assert get_config().token == TEST_TOKEN + assert ProfileConfigProvider(TEST_PROFILE).get_config().host == TEST_HOST_2 + assert ProfileConfigProvider(TEST_PROFILE).get_config().token == TEST_TOKEN def test_configure_cli_insecure(): runner = CliRunner() runner.invoke(cli.configure_cli, ['--token', '--insecure'], input=(TEST_HOST + '\n' + TEST_TOKEN + '\n')) - assert get_config_for_profile(DEFAULT_SECTION).host == TEST_HOST - assert get_config_for_profile(DEFAULT_SECTION).token == TEST_TOKEN - assert get_config_for_profile(DEFAULT_SECTION).insecure == 'True' + assert get_config().host == TEST_HOST + assert get_config().token == TEST_TOKEN + assert get_config().insecure == 'True' diff --git a/tests/configure/test_provider.py b/tests/configure/test_provider.py index e227df45..31de1fdb 100644 --- a/tests/configure/test_provider.py +++ b/tests/configure/test_provider.py @@ -20,9 +20,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + +import os +import pytest + from mock import patch from databricks_cli.configure.provider import DatabricksConfig, DEFAULT_SECTION, \ - update_and_persist_config, get_config_for_profile + update_and_persist_config, get_config_for_profile, get_config, \ + set_config_provider, ProfileConfigProvider, _get_path, DatabrickConfigProvider +from databricks_cli.utils import InvalidConfigurationError TEST_HOST = 'https://test.cloud.databricks.com' @@ -117,30 +123,66 @@ def test_get_config_if_password_environment_set(): assert config.password == TEST_PASSWORD -class TestDatabricksConfig(object): - def test_from_token(self): - config = DatabricksConfig.from_token(TEST_HOST, TEST_TOKEN) - assert config.host == TEST_HOST - assert config.token == TEST_TOKEN +def test_get_config_uses_default_profile(): + config = DatabricksConfig.from_token("hosty", "hello") + update_and_persist_config(DEFAULT_SECTION, config) + config = get_config() + assert config.is_valid_with_token + assert config.host == "hosty" + assert config.token == "hello" - def test_from_password(self): - config = DatabricksConfig.from_password(TEST_HOST, TEST_USER, TEST_PASSWORD) + +def test_get_config_uses_env_variable(): + with patch.dict('os.environ', {'DATABRICKS_HOST': TEST_HOST, + 'DATABRICKS_USERNAME': TEST_USER, + 'DATABRICKS_PASSWORD': TEST_PASSWORD}): + config = get_config() assert config.host == TEST_HOST assert config.username == TEST_USER assert config.password == TEST_PASSWORD - def test_is_valid_with_token(self): - config = DatabricksConfig.from_token(TEST_HOST, TEST_TOKEN) - assert not config.is_valid_with_password - assert config.is_valid_with_token - - def test_is_valid_with_password(self): - config = DatabricksConfig.from_password(TEST_HOST, TEST_USER, TEST_PASSWORD) - assert config.is_valid_with_password - assert not config.is_valid_with_token - - def test_is_valid(self): - config = DatabricksConfig.from_password(TEST_HOST, TEST_USER, TEST_PASSWORD) - assert config.is_valid - config = DatabricksConfig.from_token(TEST_HOST, TEST_TOKEN) - assert config.is_valid + +def test_get_config_throw_exception_if_profile_invalid(): + invalid_config = DatabricksConfig.from_token(None, None) + update_and_persist_config(DEFAULT_SECTION, invalid_config) + with pytest.raises(InvalidConfigurationError): + get_config() + + +def test_get_config_throw_exception_if_profile_absent(): + assert not os.path.exists(_get_path()) + with pytest.raises(InvalidConfigurationError): + get_config() + + +def test_get_config_override_profile(): + config = DatabricksConfig.from_token("yo", "lo") + update_and_persist_config(TEST_PROFILE, config) + try: + provider = ProfileConfigProvider(TEST_PROFILE) + set_config_provider(provider) + config = get_config() + assert config.host == "yo" + assert config.token == "lo" + finally: + set_config_provider(None) + + +def test_get_config_override_custom(): + class TestConfigProvider(DatabrickConfigProvider): + def get_config(self): + return DatabricksConfig.from_token("Override", "Token!") + + try: + provider = TestConfigProvider() + set_config_provider(provider) + config = get_config() + assert config.host == "Override" + assert config.token == "Token!" + finally: + set_config_provider(None) + + +def test_get_config_bad_override(): + with pytest.raises(Exception): + set_config_provider("NotAConfigProvider") From ff42f1d21d25ddb8f373396647568b43ff9c8912 Mon Sep 17 00:00:00 2001 From: Andrew Linxie Date: Mon, 6 Aug 2018 14:15:36 -0700 Subject: [PATCH 12/17] fix test_api add some comments --- databricks_cli/stack/api.py | 7 +++++-- tests/stack/test_api.py | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/databricks_cli/stack/api.py b/databricks_cli/stack/api.py index 29fe06ac..cd4fe0f3 100644 --- a/databricks_cli/stack/api.py +++ b/databricks_cli/stack/api.py @@ -91,8 +91,11 @@ def deploy(self, stack_config, stack_status=None, **kwargs): :param stack_config: Must have the fields of 'name', the name of the stack and 'resources', a list of stack resources. - :param stack_status: Must have the fields of - :return: + :param stack_status: Must have the fields of 'name', the name of the stack, 'resources', + a list of stack resources, and 'deployed', a list of resource statuses from a previous + deployment. + :return: new_stack_status: The new stack status generated from the deployment of + the given stack_config. """ click.echo('#' * 80) self._validate_config(stack_config) diff --git a/tests/stack/test_api.py b/tests/stack/test_api.py index c1f0b30d..a19c450f 100644 --- a/tests/stack/test_api.py +++ b/tests/stack/test_api.py @@ -498,7 +498,7 @@ def test_download_resource(self, stack_api): } stack_api._download_resource(resource_badservice) - def test_deploy_valid_stack_status(self, stack_api, tmpdir): + def test_deploy_config(self, stack_api, tmpdir): """ The stack status generated from a correctly set up stack passed through deployment in stack_api should pass the validation assertions within the deployment procedure From e45791fb0f3dcaca6a4a693131ac0cf6cba1c0a9 Mon Sep 17 00:00:00 2001 From: Andrew Linxie Date: Mon, 6 Aug 2018 14:16:10 -0700 Subject: [PATCH 13/17] Fix merge error --- tests/stack/test_api.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/stack/test_api.py b/tests/stack/test_api.py index 66bea973..a19c450f 100644 --- a/tests/stack/test_api.py +++ b/tests/stack/test_api.py @@ -557,11 +557,7 @@ def test_deploy_config(self, stack_api, tmpdir): # All functions to pull the deployed output were mocked to return deploy_output assert deployed_resource.get(api.RESOURCE_DEPLOY_OUTPUT) == test_deploy_output -<<<<<<< HEAD # stack_api.deploy should create a valid stack status when given an existing -======= - # stack_api.deploy_config should create a valid stack status when given an existing ->>>>>>> stack-validation # stack_status new_stack_status_2 = stack_api.deploy(test_stack, stack_status=TEST_STATUS) assert new_stack_status_2.get(api.STACK_RESOURCES) == test_stack.get(api.STACK_RESOURCES) From a7a150d8754dbbc121643357ef07055fbd36c711 Mon Sep 17 00:00:00 2001 From: Andrew Linxie Date: Mon, 6 Aug 2018 15:47:03 -0700 Subject: [PATCH 14/17] Stack CLI: Service-specific validation for resources in stack status and stack config. (#154) * added stack status and stack config validation for all resource types along with end-to-end test * little change in comment * Added filesystem configuration for end-to-end test * moved import copy * remove instances of using 'NOTEBOOK' and 'DIRECTORY' * Replaced all magic string fields with global variables * Changed test names/descriptions along with add more cases to validate outputted stack status from deploy_config * modularized assertions for validation * added more #*80 outputs and better generalization and modularity to validate functions * changed comment of test * deleted stack_download test, changes in comments * Changed test name --- databricks_cli/stack/api.py | 164 +++++++++++++++-------- tests/stack/test_api.py | 259 ++++++++++++++++++++++++++---------- 2 files changed, 292 insertions(+), 131 deletions(-) diff --git a/databricks_cli/stack/api.py b/databricks_cli/stack/api.py index 01fb5836..ca90b6d2 100644 --- a/databricks_cli/stack/api.py +++ b/databricks_cli/stack/api.py @@ -59,6 +59,20 @@ RESOURCE_DEPLOY_TIMESTAMP = 'timestamp' CLI_VERSION_KEY = 'cli_version' +# Job Service Properties +JOBS_RESOURCE_NAME = 'name' +JOBS_RESOURCE_JOB_ID = 'job_id' + +# Workspace Service Properties +WORKSPACE_RESOURCE_SOURCE_PATH = 'source_path' +WORKSPACE_RESOURCE_PATH = 'path' +WORKSPACE_RESOURCE_OBJECT_TYPE = 'object_type' + +# DBFS Service Properties +DBFS_RESOURCE_SOURCE_PATH = 'source_path' +DBFS_RESOURCE_PATH = 'path' +DBFS_RESOURCE_IS_DIR = 'is_dir' + class StackApi(object): def __init__(self, api_client): @@ -124,14 +138,17 @@ def deploy_config(self, stack_config, stack_status=None, **kwargs): :param stack_status: Must have the fields of :return: """ + click.echo('#' * 80) self._validate_config(stack_config) if stack_status: + click.echo('#' * 80) self._validate_status(stack_status) resource_id_to_status = self._get_resource_to_status_map(stack_status) else: resource_id_to_status = {} stack_name = stack_config.get(STACK_NAME) + click.echo('#' * 80) click.echo('Deploying stack {}'.format(stack_name)) # List of statuses, One for each resource in stack_config[STACK_RESOURCES] @@ -147,7 +164,6 @@ def deploy_config(self, stack_config, stack_status=None, **kwargs): new_resource_status = self._deploy_resource(resource_config, resource_status, **kwargs) resource_statuses.append(new_resource_status) click.echo('#' * 80) - # stack deploy status is original config with deployed resource statuses added new_stack_status = copy.deepcopy(stack_config) new_stack_status.update({STACK_DEPLOYED: resource_statuses}) @@ -155,6 +171,7 @@ def deploy_config(self, stack_config, stack_status=None, **kwargs): # Validate that the status has been created correctly self._validate_status(new_stack_status) + click.echo('#' * 80) return new_stack_status @@ -277,12 +294,12 @@ def _deploy_job(self, resource_properties, physical_id=None): job_settings = resource_properties # resource_properties of jobs are solely job settings. if physical_id: - job_id = physical_id.get('job_id') + job_id = physical_id.get(JOBS_RESOURCE_JOB_ID) self._update_job(job_settings, job_id) else: job_id = self._put_job(job_settings) click.echo("Job deployed on Databricks with Job ID {}".format(job_id)) - physical_id = {'job_id': job_id} + physical_id = {JOBS_RESOURCE_JOB_ID: job_id} deploy_output = self.jobs_client.get_job(job_id) return physical_id, deploy_output @@ -296,9 +313,7 @@ def _put_job(self, job_settings): :param job_settings: :return: job_id, Physical ID of job on Databricks server. """ - if 'name' not in job_settings: - raise StackError("Please supply 'name' in job resource 'properties'") - job_name = job_settings.get('name') + job_name = job_settings.get(JOBS_RESOURCE_NAME) jobs_same_name = self.jobs_client._list_jobs_by_name(job_name) if len(jobs_same_name) > 1: raise StackError("Multiple jobs with the same name '{}' already exist, aborting" @@ -342,15 +357,16 @@ def _deploy_workspace(self, resource_properties, physical_id, overwrite): deploy_output is the initial information about the asset on databricks at deploy time returned by the REST API. """ - # Required fields. TODO(alinxie) put in _validate_config - local_path = resource_properties.get('source_path') - workspace_path = resource_properties.get('path') - object_type = resource_properties.get('object_type') + local_path = resource_properties.get(WORKSPACE_RESOURCE_SOURCE_PATH) + workspace_path = resource_properties.get(WORKSPACE_RESOURCE_PATH) + object_type = resource_properties.get(WORKSPACE_RESOURCE_OBJECT_TYPE) actual_object_type = DIRECTORY if os.path.isdir(local_path) else NOTEBOOK if object_type != actual_object_type: - raise StackError("Field 'object_type' ({}) not consistent" - "with actual object type ({})".format(object_type, actual_object_type)) + raise StackError("Field '{}' ({}) not consistent" + "with actual object type ({})".format(WORKSPACE_RESOURCE_OBJECT_TYPE, + object_type, + actual_object_type)) click.echo('Uploading {} from {} to Databricks workspace at {}'.format(object_type, local_path, @@ -373,11 +389,11 @@ def _deploy_workspace(self, resource_properties, physical_id, overwrite): # Shouldn't reach here because of verification of object_type above. assert False - if physical_id and physical_id['path'] != workspace_path: + if physical_id and physical_id[WORKSPACE_RESOURCE_PATH] != workspace_path: # physical_id['path'] is the workspace path from the last deployment. Alert when changed - click.echo("Workspace asset had path changed from {} to {}".format(physical_id['path'], - workspace_path)) - new_physical_id = {'path': workspace_path} + click.echo("Workspace asset had path changed from {} to {}" + .format(physical_id[WORKSPACE_RESOURCE_PATH], workspace_path)) + new_physical_id = {WORKSPACE_RESOURCE_PATH: workspace_path} deploy_output = self.workspace_client.client.get_status(workspace_path) return new_physical_id, deploy_output @@ -390,10 +406,9 @@ def _download_workspace(self, resource_properties, overwrite): 'source_path', 'path' and 'object_type' fields. :param overwrite: Whether or not to overwrite the contents of workspace notebooks. """ - # Required fields. TODO(alinxie) put in _validate_config - local_path = resource_properties.get('source_path') - workspace_path = resource_properties.get('path') - object_type = resource_properties.get('object_type') + local_path = resource_properties.get(WORKSPACE_RESOURCE_SOURCE_PATH) + workspace_path = resource_properties.get(WORKSPACE_RESOURCE_PATH) + object_type = resource_properties.get(WORKSPACE_RESOURCE_OBJECT_TYPE) click.echo('Downloading {} from Databricks path {} to {}'.format(object_type, workspace_path, local_path)) @@ -411,7 +426,8 @@ def _download_workspace(self, resource_properties, overwrite): elif object_type == DIRECTORY: self.workspace_client.export_workspace_dir(workspace_path, local_path, overwrite) else: - raise StackError("Invalid value for 'object_type' field: {}".format(object_type)) + raise StackError("Invalid value for '{}' field: {}" + .format(WORKSPACE_RESOURCE_OBJECT_TYPE, object_type)) def _deploy_dbfs(self, resource_properties, physical_id, overwrite): """ @@ -428,10 +444,10 @@ def _deploy_dbfs(self, resource_properties, physical_id, overwrite): deploy_output is the initial information about the dbfs asset at deploy time returned by the REST API. """ - # Required fields. TODO(alinxie) validate fields in _validate_config - local_path = resource_properties.get('source_path') - dbfs_path = resource_properties.get('path') - is_dir = resource_properties.get('is_dir') + + local_path = resource_properties.get(DBFS_RESOURCE_SOURCE_PATH) + dbfs_path = resource_properties.get(DBFS_RESOURCE_PATH) + is_dir = resource_properties.get(DBFS_RESOURCE_IS_DIR) if is_dir != os.path.isdir(local_path): dir_or_file = 'directory' if os.path.isdir(local_path) else 'file' @@ -445,11 +461,11 @@ def _deploy_dbfs(self, resource_properties, physical_id, overwrite): click.echo('Uploading file from {} to DBFS at {}'.format(local_path, dbfs_path)) self.dbfs_client.cp(recursive=False, overwrite=overwrite, src=local_path, dst=dbfs_path) - if physical_id and physical_id['path'] != dbfs_path: + if physical_id and physical_id[DBFS_RESOURCE_PATH] != dbfs_path: # physical_id['path'] is the dbfs path from the last deployment. Alert when changed - click.echo("Dbfs asset had path changed from {} to {}".format(physical_id['path'], - dbfs_path)) - new_physical_id = {'path': dbfs_path} + click.echo("Dbfs asset had path changed from {} to {}" + .format(physical_id[DBFS_RESOURCE_PATH], dbfs_path)) + new_physical_id = {DBFS_RESOURCE_PATH: dbfs_path} deploy_output = self.dbfs_client.client.get_status(dbfs_path) return new_physical_id, deploy_output @@ -459,29 +475,46 @@ def _validate_config(self, stack_config): Validate fields within a stack configuration. This ensures that an inputted configuration has the necessary fields for stack deployment to function well. - TODO(alinxie): Add validation for separate resource services and their properties. :param stack_config: dict- stack config that is inputted by the user. :return: None. Raises errors to stop deployment if there is a problem. """ - if STACK_NAME not in stack_config: - raise StackError("'{}' not in configuration".format(STACK_NAME)) - if STACK_RESOURCES not in stack_config: - raise StackError("'{}' not in configuration".format(STACK_RESOURCES)) + click.echo('Validating fields in stack configuration...') + self._assert_fields_in_dict([STACK_NAME, STACK_RESOURCES], stack_config) + seen_resource_ids = set() # Store seen resources to restrict duplicates. for resource in stack_config.get(STACK_RESOURCES): - if RESOURCE_ID not in resource: - raise StackError("{} doesn't exist in resource config".format(RESOURCE_ID)) - if RESOURCE_SERVICE not in resource: - raise StackError("{} doesn't exist in resource config".format(RESOURCE_SERVICE)) - if RESOURCE_PROPERTIES not in resource: - raise StackError("{} doesn't exist in resource config".format(RESOURCE_PROPERTIES)) - # Error on duplicate resource ID's + # Get validate resource ID exists, then get it. + self._assert_fields_in_dict([RESOURCE_ID], resource) resource_id = resource.get(RESOURCE_ID) + + click.echo('Validating fields in resource with ID "{}"'.format(resource_id)) + self._assert_fields_in_dict([RESOURCE_SERVICE, RESOURCE_PROPERTIES], resource) + + resource_service = resource.get(RESOURCE_SERVICE) + resource_properties = resource.get(RESOURCE_PROPERTIES) + + # Error on duplicate resource ID's if resource_id in seen_resource_ids: - raise StackError("Duplicate resource ID '{}' found, please resolve.".format( + raise StackError('Duplicate resource ID "{}" found, please resolve.'.format( resource_id)) seen_resource_ids.add(resource_id) + # Resource service-specific validations + click.echo('Validating fields in "{}" of {} resource.' + .format(RESOURCE_PROPERTIES, resource_service)) + if resource_service == JOBS_SERVICE: + self._assert_fields_in_dict([JOBS_RESOURCE_NAME], resource_properties) + elif resource_service == WORKSPACE_SERVICE: + self._assert_fields_in_dict( + [WORKSPACE_RESOURCE_PATH, WORKSPACE_RESOURCE_SOURCE_PATH, + WORKSPACE_RESOURCE_OBJECT_TYPE], resource_properties) + elif resource_service == DBFS_SERVICE: + self._assert_fields_in_dict( + [DBFS_RESOURCE_PATH, DBFS_RESOURCE_SOURCE_PATH, + DBFS_RESOURCE_IS_DIR], resource_properties) + else: + raise StackError("Resource service '{}' not supported".format(resource_service)) + def _validate_status(self, stack_status): """ Validate fields within a stack status. This ensures that a stack status has the @@ -489,26 +522,39 @@ def _validate_status(self, stack_status): If there is an error here, then it is either an implementation error that must be fixed by a developer or the User edited the stack status file created by the program. - TODO(alinxie): Add validation for separate resource services and their physical id's. + :param stack_status: dict- stack status that is created by the program. :return: None. Raises errors to stop deployment if there is a problem. """ - if STACK_NAME not in stack_status: - raise StackError("'{}' not in status.".format(STACK_NAME)) - if STACK_RESOURCES not in stack_status: - raise StackError("'{}' not in status".format(STACK_RESOURCES)) - if STACK_DEPLOYED not in stack_status: - raise StackError("'{}' not in status".format(STACK_DEPLOYED)) - for deployed_resource in stack_status.get(STACK_DEPLOYED): - if RESOURCE_ID not in deployed_resource: - raise StackError("{} doesn't exist in deployed resource status".format( - RESOURCE_ID)) - if RESOURCE_SERVICE not in deployed_resource: - raise StackError("{} doesn't exist in deployed resource status".format( - RESOURCE_SERVICE)) - if RESOURCE_PHYSICAL_ID not in deployed_resource: - raise StackError("{} doesn't exist in deployed resource status".format( - RESOURCE_PHYSICAL_ID)) + click.echo('Validating fields in stack status...') + self._assert_fields_in_dict([STACK_NAME, STACK_RESOURCES, STACK_DEPLOYED], stack_status) + + for resource_status in stack_status.get(STACK_DEPLOYED): + self._assert_fields_in_dict([RESOURCE_ID], resource_status) + resource_id = resource_status.get(RESOURCE_ID) + click.echo('Validating fields in resource status of resource with ID "{}"' + .format(resource_id)) + self._assert_fields_in_dict([RESOURCE_SERVICE, RESOURCE_PHYSICAL_ID, + RESOURCE_DEPLOY_OUTPUT], resource_status) + + resource_service = resource_status.get(RESOURCE_SERVICE) + resource_physical_id = resource_status.get(RESOURCE_PHYSICAL_ID) + + click.echo('Validating fields in "{}" of {} resource status' + .format(RESOURCE_PHYSICAL_ID, resource_service)) + if resource_service == JOBS_SERVICE: + self._assert_fields_in_dict([JOBS_RESOURCE_JOB_ID], resource_physical_id) + elif resource_service == WORKSPACE_SERVICE: + self._assert_fields_in_dict([WORKSPACE_RESOURCE_PATH], resource_physical_id) + elif resource_service == DBFS_SERVICE: + self._assert_fields_in_dict([DBFS_RESOURCE_PATH], resource_physical_id) + else: + raise StackError("{} not a valid resource status service".format(resource_service)) + + def _assert_fields_in_dict(self, fields, dictionary): + for field in fields: + if field not in dictionary: + raise StackError('Required field "{}" not found'.format(field)) def _get_resource_to_status_map(self, stack_status): """ diff --git a/tests/stack/test_api.py b/tests/stack/test_api.py index 2c304dad..8712876c 100644 --- a/tests/stack/test_api.py +++ b/tests/stack/test_api.py @@ -27,17 +27,19 @@ import os import json +import copy import mock from requests.exceptions import HTTPError import pytest import databricks_cli.stack.api as api +import databricks_cli.workspace.api as workspace_api from databricks_cli.stack.exceptions import StackError TEST_STACK_PATH = 'stack/stack.json' TEST_JOB_SETTINGS = { - 'name': 'test job' + api.JOBS_RESOURCE_NAME: 'test job' } TEST_JOB_RESOURCE_ID = 'test job' TEST_JOB_RESOURCE = { @@ -45,29 +47,31 @@ api.RESOURCE_SERVICE: api.JOBS_SERVICE, api.RESOURCE_PROPERTIES: TEST_JOB_SETTINGS } -TEST_JOB_PHYSICAL_ID = {'job_id': 1234} +TEST_JOB_PHYSICAL_ID = {api.JOBS_RESOURCE_JOB_ID: 1234} TEST_WORKSPACE_NB_PROPERTIES = { - 'source_path': 'test/notebook.py', - 'path': '/test/notebook.py', - 'object_type': 'NOTEBOOK' + api.WORKSPACE_RESOURCE_SOURCE_PATH: 'test/notebook.py', + api.WORKSPACE_RESOURCE_PATH: '/test/notebook.py', + api.WORKSPACE_RESOURCE_OBJECT_TYPE: workspace_api.NOTEBOOK } TEST_WORKSPACE_DIR_PROPERTIES = { - 'source_path': 'test/dir', - 'path': '/test/dir', - 'object_type': 'DIRECTORY' + api.WORKSPACE_RESOURCE_SOURCE_PATH: 'test/workspace/dir', + api.WORKSPACE_RESOURCE_PATH: '/test/dir', + api.WORKSPACE_RESOURCE_OBJECT_TYPE: workspace_api.DIRECTORY } +TEST_WORKSPACE_NB_PHYSICAL_ID = {api.WORKSPACE_RESOURCE_PATH: '/test/notebook.py'} +TEST_WORKSPACE_DIR_PHYSICAL_ID = {api.WORKSPACE_RESOURCE_PATH: '/test/dir'} TEST_DBFS_FILE_PROPERTIES = { - 'source_path': 'test.jar', - 'path': 'dbfs:/test/test.jar', - 'is_dir': False + api.DBFS_RESOURCE_SOURCE_PATH: 'test.jar', + api.DBFS_RESOURCE_PATH: 'dbfs:/test/test.jar', + api.DBFS_RESOURCE_IS_DIR: False } TEST_DBFS_DIR_PROPERTIES = { - 'source_path': 'test/dir', - 'path': 'dbfs:/test/dir', - 'is_dir': True + api.DBFS_RESOURCE_SOURCE_PATH: 'test/dbfs/dir', + api.DBFS_RESOURCE_PATH: 'dbfs:/test/dir', + api.DBFS_RESOURCE_IS_DIR: True } -TEST_DBFS_FILE_PHYSICAL_ID = {'path': 'dbfs:/test/test.jar'} -TEST_DBFS_DIR_PHYSICAL_ID = {'path': 'dbfs:/test/dir'} +TEST_DBFS_FILE_PHYSICAL_ID = {api.DBFS_RESOURCE_PATH: 'dbfs:/test/test.jar'} +TEST_DBFS_DIR_PHYSICAL_ID = {api.DBFS_RESOURCE_PATH: 'dbfs:/test/dir'} TEST_RESOURCE_ID = 'test job' TEST_RESOURCE_WORKSPACE_NB_ID = 'test notebook' TEST_RESOURCE_WORKSPACE_DIR_ID = 'test directory' @@ -96,23 +100,40 @@ TEST_JOB_STATUS = { api.RESOURCE_ID: TEST_RESOURCE_ID, api.RESOURCE_SERVICE: api.JOBS_SERVICE, - api.RESOURCE_PHYSICAL_ID: TEST_JOB_PHYSICAL_ID + api.RESOURCE_PHYSICAL_ID: TEST_JOB_PHYSICAL_ID, + api.RESOURCE_DEPLOY_OUTPUT: {} +} +TEST_WORKSPACE_NB_STATUS = { + api.RESOURCE_ID: TEST_RESOURCE_WORKSPACE_NB_ID, + api.RESOURCE_SERVICE: api.WORKSPACE_SERVICE, + api.RESOURCE_PHYSICAL_ID: TEST_WORKSPACE_NB_PHYSICAL_ID, + api.RESOURCE_DEPLOY_OUTPUT: {} +} +TEST_WORKSPACE_DIR_STATUS = { + api.RESOURCE_ID: TEST_RESOURCE_WORKSPACE_DIR_ID, + api.RESOURCE_SERVICE: api.WORKSPACE_SERVICE, + api.RESOURCE_PHYSICAL_ID: TEST_WORKSPACE_DIR_PHYSICAL_ID, + api.RESOURCE_DEPLOY_OUTPUT: {} } TEST_DBFS_FILE_STATUS = { api.RESOURCE_ID: TEST_RESOURCE_DBFS_FILE_ID, api.RESOURCE_SERVICE: api.DBFS_SERVICE, - api.RESOURCE_PHYSICAL_ID: TEST_DBFS_FILE_PHYSICAL_ID + api.RESOURCE_PHYSICAL_ID: TEST_DBFS_FILE_PHYSICAL_ID, + api.RESOURCE_DEPLOY_OUTPUT: {} } TEST_DBFS_DIR_STATUS = { api.RESOURCE_ID: TEST_RESOURCE_DBFS_DIR_ID, api.RESOURCE_SERVICE: api.DBFS_SERVICE, - api.RESOURCE_PHYSICAL_ID: TEST_DBFS_DIR_PHYSICAL_ID + api.RESOURCE_PHYSICAL_ID: TEST_DBFS_DIR_PHYSICAL_ID, + api.RESOURCE_DEPLOY_OUTPUT: {} } TEST_STACK = { api.STACK_NAME: "test-stack", api.STACK_RESOURCES: [TEST_JOB_RESOURCE, TEST_WORKSPACE_NB_RESOURCE, - TEST_WORKSPACE_DIR_RESOURCE] + TEST_WORKSPACE_DIR_RESOURCE, + TEST_DBFS_FILE_RESOURCE, + TEST_DBFS_DIR_RESOURCE] } TEST_STATUS = { api.STACK_NAME: "test-stack", @@ -122,8 +143,11 @@ TEST_DBFS_FILE_RESOURCE, TEST_DBFS_DIR_RESOURCE], api.STACK_DEPLOYED: [TEST_JOB_STATUS, + TEST_WORKSPACE_NB_STATUS, + TEST_WORKSPACE_DIR_STATUS, TEST_DBFS_FILE_STATUS, - TEST_DBFS_DIR_STATUS] + TEST_DBFS_DIR_STATUS, + ] } @@ -140,13 +164,14 @@ def get_job(self, job_id): return self.jobs_in_databricks[job_id] def reset_job(self, data): - if data['job_id'] not in self.jobs_in_databricks: + if data[api.JOBS_RESOURCE_JOB_ID] not in self.jobs_in_databricks: raise HTTPError('Job Not Found') - self.jobs_in_databricks[data['job_id']]['job_settings'] = data['new_settings'] + self.jobs_in_databricks[data[api.JOBS_RESOURCE_JOB_ID]]['job_settings'] = \ + data['new_settings'] def create_job(self, job_settings): job_id = self.available_job_id.pop() - new_job_json = {'job_id': job_id, + new_job_json = {api.JOBS_RESOURCE_JOB_ID: job_id, 'job_settings': job_settings.copy(), 'creator_user_name': 'testuser@example.com', 'created_time': 987654321} @@ -262,13 +287,16 @@ def test_deploy_job(self, stack_api): jobs with the same name that exist. """ test_job_settings = TEST_JOB_SETTINGS - alt_test_job_settings = {'name': 'alt test job'} # Different name than TEST_JOB_SETTINGS + # Different name than TEST_JOB_SETTINGS + alt_test_job_settings = {api.JOBS_RESOURCE_NAME: 'alt test job'} stack_api.jobs_client = _TestJobsClient() # TEST CASE 1: # stack_api._deploy_job should create job if physical_id not given job doesn't exist res_physical_id_1, res_deploy_output_1 = stack_api._deploy_job(test_job_settings) - assert stack_api.jobs_client.get_job(res_physical_id_1['job_id']) == res_deploy_output_1 - assert res_deploy_output_1['job_id'] == res_physical_id_1['job_id'] + assert stack_api.jobs_client.get_job(res_physical_id_1[api.JOBS_RESOURCE_JOB_ID]) == \ + res_deploy_output_1 + assert res_deploy_output_1[api.JOBS_RESOURCE_JOB_ID] == \ + res_physical_id_1[api.JOBS_RESOURCE_JOB_ID] assert test_job_settings == res_deploy_output_1['job_settings'] # TEST CASE 2: @@ -276,8 +304,10 @@ def test_deploy_job(self, stack_api): res_physical_id_2, res_deploy_output_2 = stack_api._deploy_job(alt_test_job_settings, res_physical_id_1) # physical job id not changed from last update - assert res_physical_id_2['job_id'] == res_physical_id_1['job_id'] - assert res_deploy_output_2['job_id'] == res_physical_id_2['job_id'] + assert res_physical_id_2[api.JOBS_RESOURCE_JOB_ID] == \ + res_physical_id_1[api.JOBS_RESOURCE_JOB_ID] + assert res_deploy_output_2[api.JOBS_RESOURCE_JOB_ID] == \ + res_physical_id_2[api.JOBS_RESOURCE_JOB_ID] assert alt_test_job_settings == res_deploy_output_2['job_settings'] # TEST CASE 3: @@ -286,8 +316,10 @@ def test_deploy_job(self, stack_api): alt_test_job_settings['new_property'] = 'new_property_value' res_physical_id_3, res_deploy_output_3 = stack_api._deploy_job(alt_test_job_settings) # physical job id not changed from last update - assert res_physical_id_3['job_id'] == res_physical_id_2['job_id'] - assert res_deploy_output_3['job_id'] == res_physical_id_3['job_id'] + assert res_physical_id_3[api.JOBS_RESOURCE_JOB_ID] == \ + res_physical_id_2[api.JOBS_RESOURCE_JOB_ID] + assert res_deploy_output_3[api.JOBS_RESOURCE_JOB_ID] == \ + res_physical_id_3[api.JOBS_RESOURCE_JOB_ID] assert alt_test_job_settings == res_deploy_output_3['job_settings'] # TEST CASE 4 @@ -295,7 +327,7 @@ def test_deploy_job(self, stack_api): # databricks, an error should be raised # Add new job with different physical id but same name settings as alt_test_job_settings stack_api.jobs_client.jobs_in_databricks[123] = { - 'job_id': 123, + api.JOBS_RESOURCE_JOB_ID: 123, 'job_settings': alt_test_job_settings } with pytest.raises(StackError): @@ -315,26 +347,32 @@ def test_deploy_workspace(self, stack_api, tmpdir): test_workspace_nb_properties = TEST_WORKSPACE_NB_PROPERTIES.copy() test_workspace_nb_properties.update( - {'source_path': os.path.join(tmpdir.strpath, - test_workspace_nb_properties['source_path'])}) - os.makedirs(os.path.dirname(test_workspace_nb_properties['source_path'])) - with open(test_workspace_nb_properties['source_path'], 'w') as f: + {api.WORKSPACE_RESOURCE_SOURCE_PATH: os.path.join(tmpdir.strpath, + test_workspace_nb_properties[ + api.WORKSPACE_RESOURCE_SOURCE_PATH + ])}) + os.makedirs( + os.path.dirname(test_workspace_nb_properties[api.WORKSPACE_RESOURCE_SOURCE_PATH])) + with open(test_workspace_nb_properties[api.WORKSPACE_RESOURCE_SOURCE_PATH], 'w') as f: f.write("print('test')\n") test_workspace_dir_properties = TEST_WORKSPACE_DIR_PROPERTIES.copy() test_workspace_dir_properties.update( - {'source_path': os.path.join(tmpdir.strpath, - test_workspace_dir_properties['source_path'])}) - os.makedirs(test_workspace_dir_properties['source_path']) + {api.WORKSPACE_RESOURCE_SOURCE_PATH: os.path.join(tmpdir.strpath, + test_workspace_dir_properties[ + api.WORKSPACE_RESOURCE_SOURCE_PATH + ])}) + os.makedirs(test_workspace_dir_properties[api.WORKSPACE_RESOURCE_SOURCE_PATH]) # Test Input of Workspace directory properties. dir_physical_id, dir_deploy_output = \ stack_api._deploy_workspace(test_workspace_dir_properties, None, True) stack_api.workspace_client.import_workspace_dir.assert_called_once() assert stack_api.workspace_client.import_workspace_dir.call_args[0][0] == \ - test_workspace_dir_properties['source_path'] + test_workspace_dir_properties[api.WORKSPACE_RESOURCE_SOURCE_PATH] assert stack_api.workspace_client.import_workspace_dir.call_args[0][1] == \ - test_workspace_dir_properties['path'] - assert dir_physical_id == {'path': test_workspace_dir_properties['path']} + test_workspace_dir_properties[api.WORKSPACE_RESOURCE_PATH] + assert dir_physical_id == { + api.WORKSPACE_RESOURCE_PATH: test_workspace_dir_properties[api.WORKSPACE_RESOURCE_PATH]} assert dir_deploy_output == test_deploy_output # Test Input of Workspace notebook properties. @@ -342,19 +380,21 @@ def test_deploy_workspace(self, stack_api, tmpdir): stack_api._deploy_workspace(test_workspace_nb_properties, None, True) stack_api.workspace_client.import_workspace.assert_called_once() assert stack_api.workspace_client.import_workspace.call_args[0][0] == \ - test_workspace_nb_properties['source_path'] + test_workspace_nb_properties[api.WORKSPACE_RESOURCE_SOURCE_PATH] assert stack_api.workspace_client.import_workspace.call_args[0][1] == \ - test_workspace_nb_properties['path'] - assert nb_physical_id == {'path': test_workspace_nb_properties['path']} + test_workspace_nb_properties[api.WORKSPACE_RESOURCE_PATH] + assert nb_physical_id == {api.WORKSPACE_RESOURCE_PATH: + test_workspace_nb_properties[api.WORKSPACE_RESOURCE_PATH]} assert nb_deploy_output == test_deploy_output # Should raise error if resource object_type doesn't match actually is in filesystem. - test_workspace_dir_properties.update({'object_type': 'NOTEBOOK'}) + test_workspace_dir_properties.update( + {api.WORKSPACE_RESOURCE_OBJECT_TYPE: workspace_api.NOTEBOOK}) with pytest.raises(StackError): stack_api._deploy_workspace(test_workspace_dir_properties, None, True) # Should raise error if object_type is not NOTEBOOK or DIRECTORY - test_workspace_dir_properties.update({'object_type': 'INVALID_TYPE'}) + test_workspace_dir_properties.update({api.WORKSPACE_RESOURCE_OBJECT_TYPE: 'INVALID_TYPE'}) with pytest.raises(StackError): stack_api._deploy_workspace(test_workspace_dir_properties, None, True) @@ -371,15 +411,17 @@ def test_deploy_dbfs(self, stack_api, tmpdir): test_dbfs_file_properties = TEST_DBFS_FILE_PROPERTIES.copy() test_dbfs_file_properties.update( - {'source_path': os.path.join(tmpdir.strpath, - test_dbfs_file_properties['source_path'])}) - with open(test_dbfs_file_properties['source_path'], 'w') as f: + {api.DBFS_RESOURCE_SOURCE_PATH: os.path.join(tmpdir.strpath, + test_dbfs_file_properties[ + api.DBFS_RESOURCE_SOURCE_PATH])}) + with open(test_dbfs_file_properties[api.DBFS_RESOURCE_SOURCE_PATH], 'w') as f: f.write("print('test')\n") test_dbfs_dir_properties = TEST_DBFS_DIR_PROPERTIES.copy() test_dbfs_dir_properties.update( - {'source_path': os.path.join(tmpdir.strpath, - test_dbfs_dir_properties['source_path'])}) - os.makedirs(test_dbfs_dir_properties['source_path']) + {api.DBFS_RESOURCE_SOURCE_PATH: os.path.join(tmpdir.strpath, + test_dbfs_dir_properties[ + api.DBFS_RESOURCE_SOURCE_PATH])}) + os.makedirs(test_dbfs_dir_properties[api.DBFS_RESOURCE_SOURCE_PATH]) dir_physical_id, dir_deploy_output = \ stack_api._deploy_dbfs(test_dbfs_dir_properties, None, True) @@ -387,10 +429,11 @@ def test_deploy_dbfs(self, stack_api, tmpdir): assert stack_api.dbfs_client.cp.call_args[1]['recursive'] is True assert stack_api.dbfs_client.cp.call_args[1]['overwrite'] is True assert stack_api.dbfs_client.cp.call_args[1]['src'] == \ - test_dbfs_dir_properties['source_path'] + test_dbfs_dir_properties[api.DBFS_RESOURCE_SOURCE_PATH] assert stack_api.dbfs_client.cp.call_args[1]['dst'] == \ - test_dbfs_dir_properties['path'] - assert dir_physical_id == {'path': test_dbfs_dir_properties['path']} + test_dbfs_dir_properties[api.DBFS_RESOURCE_PATH] + assert dir_physical_id == {api.DBFS_RESOURCE_PATH: + test_dbfs_dir_properties[api.DBFS_RESOURCE_PATH]} assert dir_deploy_output == test_deploy_output nb_physical_id, nb_deploy_output = \ @@ -399,15 +442,16 @@ def test_deploy_dbfs(self, stack_api, tmpdir): assert stack_api.dbfs_client.cp.call_args[1]['recursive'] is False assert stack_api.dbfs_client.cp.call_args[1]['overwrite'] is True assert stack_api.dbfs_client.cp.call_args[1]['src'] == \ - test_dbfs_file_properties['source_path'] + test_dbfs_file_properties[api.DBFS_RESOURCE_SOURCE_PATH] assert stack_api.dbfs_client.cp.call_args[1]['dst'] == \ - test_dbfs_file_properties['path'] - assert nb_physical_id == {'path': test_dbfs_file_properties['path']} + test_dbfs_file_properties[api.DBFS_RESOURCE_PATH] + assert nb_physical_id == {api.DBFS_RESOURCE_PATH: + test_dbfs_file_properties[api.DBFS_RESOURCE_PATH]} assert nb_deploy_output == test_deploy_output # Should raise error if resource properties is_dir field isn't consistent with whether the # resource is a directory or not locally. - test_dbfs_dir_properties.update({'is_dir': False}) + test_dbfs_dir_properties.update({api.DBFS_RESOURCE_IS_DIR: False}) with pytest.raises(StackError): stack_api._deploy_dbfs(test_dbfs_dir_properties, None, True) @@ -419,7 +463,7 @@ def test_deploy_resource(self, stack_api): # TODO(alinxie) Change this test to directly call stack_api.deploy/stack_api.deploy_config # A job resource should have _deploy_resource call on _deploy_job stack_api._deploy_job = mock.MagicMock() - test_job_physical_id = {'job_id': 12345} + test_job_physical_id = {api.JOBS_RESOURCE_JOB_ID: 12345} stack_api._deploy_job.return_value = (test_job_physical_id, {}) test_job_resource_status = {api.RESOURCE_PHYSICAL_ID: test_job_physical_id} new_resource_status = stack_api._deploy_resource(TEST_JOB_RESOURCE, @@ -434,7 +478,7 @@ def test_deploy_resource(self, stack_api): # A workspace resource should have _deploy_resource call on _deploy_workspace stack_api._deploy_workspace = mock.MagicMock() - test_workspace_physical_id = {'path': '/test/path'} + test_workspace_physical_id = {api.WORKSPACE_RESOURCE_PATH: '/test/path'} stack_api._deploy_workspace.return_value = (test_workspace_physical_id, {}) test_workspace_resource_status = {api.RESOURCE_PHYSICAL_ID: test_workspace_physical_id} stack_api._deploy_resource(TEST_WORKSPACE_NB_RESOURCE, @@ -480,31 +524,36 @@ def test_download_workspace(self, stack_api, tmpdir): test_workspace_nb_properties = TEST_WORKSPACE_NB_PROPERTIES.copy() test_workspace_nb_properties.update( - {'source_path': os.path.join(tmpdir.strpath, - test_workspace_nb_properties['source_path'])}) + {api.WORKSPACE_RESOURCE_SOURCE_PATH: os.path.join(tmpdir.strpath, + test_workspace_nb_properties[ + api.WORKSPACE_RESOURCE_SOURCE_PATH + ])}) test_workspace_dir_properties = TEST_WORKSPACE_DIR_PROPERTIES.copy() test_workspace_dir_properties.update( - {'source_path': os.path.join(tmpdir.strpath, - test_workspace_dir_properties['source_path'])}) + {api.WORKSPACE_RESOURCE_SOURCE_PATH: os.path.join(tmpdir.strpath, + test_workspace_dir_properties[ + api.WORKSPACE_RESOURCE_SOURCE_PATH + ])}) stack_api._download_workspace(test_workspace_dir_properties, True) stack_api.workspace_client.export_workspace_dir.assert_called_once() assert stack_api.workspace_client.export_workspace_dir.call_args[0][0] == \ - test_workspace_dir_properties['path'] + test_workspace_dir_properties[api.WORKSPACE_RESOURCE_PATH] assert stack_api.workspace_client.export_workspace_dir.call_args[0][1] == \ - test_workspace_dir_properties['source_path'] + test_workspace_dir_properties[api.WORKSPACE_RESOURCE_SOURCE_PATH] stack_api._download_workspace(test_workspace_nb_properties, True) stack_api.workspace_client.export_workspace.assert_called_once() - created_dir = os.path.dirname(os.path.abspath(test_workspace_nb_properties['source_path'])) + created_dir = os.path.dirname( + os.path.abspath(test_workspace_nb_properties[api.WORKSPACE_RESOURCE_SOURCE_PATH])) assert os.path.exists(created_dir) assert stack_api.workspace_client.export_workspace.call_args[0][0] == \ - test_workspace_nb_properties['path'] + test_workspace_nb_properties[api.WORKSPACE_RESOURCE_PATH] assert stack_api.workspace_client.export_workspace.call_args[0][1] == \ - test_workspace_nb_properties['source_path'] + test_workspace_nb_properties[api.WORKSPACE_RESOURCE_SOURCE_PATH] # Should raise error if object_type is not NOTEBOOK or DIRECTORY - test_workspace_dir_properties.update({'object_type': 'INVALID_TYPE'}) + test_workspace_dir_properties.update({api.WORKSPACE_RESOURCE_OBJECT_TYPE: 'INVALID_TYPE'}) with pytest.raises(StackError): stack_api._download_workspace(test_workspace_dir_properties, True) @@ -529,3 +578,69 @@ def test_download_resource(self, stack_api): api.RESOURCE_PROPERTIES: {'test': 'test'} } stack_api._download_resource(resource_badservice) + + def test_deploy_config(self, stack_api, tmpdir): + """ + The stack status generated from a correctly set up stack passed through deployment + in stack_api should pass the validation assertions within the deployment procedure + along with passing some correctness criteria that will be tested here. + """ + test_deploy_output = {'test': 'test'} + # Setup mocks for job resource deployment + stack_api._update_job = mock.MagicMock() + stack_api._update_job.return_value = 12345 + stack_api._put_job = mock.MagicMock() + stack_api._put_job.return_value = 12345 + stack_api.jobs_client.get_job = mock.MagicMock() + stack_api.jobs_client.get_job.return_value = test_deploy_output + + # Setup mocks for workspace resource deployment + stack_api.workspace_client.import_workspace = mock.MagicMock() + stack_api.workspace_client.import_workspace_dir = mock.MagicMock() + stack_api.workspace_client.client.get_status = mock.MagicMock() + stack_api.workspace_client.client.get_status.return_value = test_deploy_output + + # Setup mocks for dbfs resource deployment + stack_api.dbfs_client.cp = mock.MagicMock() + stack_api.dbfs_client.client = mock.MagicMock() + stack_api.dbfs_client.client.get_status.return_value = test_deploy_output + + # Create files and directories associated with workspace and dbfs resources to ensure + # that validations within resource-specific services pass. + test_stack = copy.deepcopy(TEST_STACK) + for resource in test_stack[api.STACK_RESOURCES]: + resource_service = resource[api.RESOURCE_SERVICE] + resource_properties = resource[api.RESOURCE_PROPERTIES] + curr_source_path = resource_properties.get(api.DBFS_RESOURCE_SOURCE_PATH, '') + resource_properties.update( + {api.DBFS_RESOURCE_SOURCE_PATH: os.path.join(tmpdir.strpath, curr_source_path)}) + if resource_service == api.WORKSPACE_SERVICE: + if workspace_api.NOTEBOOK == \ + resource_properties[api.WORKSPACE_RESOURCE_OBJECT_TYPE]: + os.makedirs(os.path.dirname(resource_properties[ + api.WORKSPACE_RESOURCE_SOURCE_PATH])) + with open(resource_properties[api.WORKSPACE_RESOURCE_SOURCE_PATH], 'w') as f: + f.write("print('test')\n") + if resource_properties[api.WORKSPACE_RESOURCE_OBJECT_TYPE] == \ + workspace_api.DIRECTORY: + os.makedirs(resource_properties[api.WORKSPACE_RESOURCE_SOURCE_PATH]) + elif resource_service == api.DBFS_SERVICE: + if resource_properties[api.DBFS_RESOURCE_IS_DIR]: + os.makedirs(resource_properties[api.DBFS_RESOURCE_SOURCE_PATH]) + else: + with open(resource_properties[api.DBFS_RESOURCE_SOURCE_PATH], 'w') as f: + f.write("print('test')\n") + + new_stack_status_1 = stack_api.deploy_config(test_stack) + # new stack status should have an identical list of "resources" + assert new_stack_status_1.get(api.STACK_RESOURCES) == test_stack.get(api.STACK_RESOURCES) + for deployed_resource in new_stack_status_1.get(api.STACK_DEPLOYED): + # All functions to pull the deployed output were mocked to return deploy_output + assert deployed_resource.get(api.RESOURCE_DEPLOY_OUTPUT) == test_deploy_output + + # stack_api.deploy_config should create a valid stack status when given an existing + # stack_status + new_stack_status_2 = stack_api.deploy_config(test_stack, stack_status=TEST_STATUS) + assert new_stack_status_2.get(api.STACK_RESOURCES) == test_stack.get(api.STACK_RESOURCES) + for deployed_resource in new_stack_status_2.get(api.STACK_DEPLOYED): + assert deployed_resource.get(api.RESOURCE_DEPLOY_OUTPUT) == test_deploy_output From a3de498cf9d2f719fb2a53340962d3a3061e9d6c Mon Sep 17 00:00:00 2001 From: Andrew Linxie Date: Mon, 6 Aug 2018 17:21:13 -0700 Subject: [PATCH 15/17] Clarified global variable comments --- databricks_cli/stack/api.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/databricks_cli/stack/api.py b/databricks_cli/stack/api.py index cd4fe0f3..95c8367e 100644 --- a/databricks_cli/stack/api.py +++ b/databricks_cli/stack/api.py @@ -46,14 +46,14 @@ # Config Outer Fields STACK_NAME = 'name' STACK_RESOURCES = 'resources' -STACK_DEPLOYED = 'deployed' +STACK_DEPLOYED = 'deployed' # For Stack Status # Resource Fields RESOURCE_ID = 'id' RESOURCE_SERVICE = 'service' RESOURCE_PROPERTIES = 'properties' -# Deployed Resource Fields +# Resource Status Fields RESOURCE_PHYSICAL_ID = 'physical_id' RESOURCE_DEPLOY_OUTPUT = 'deploy_output' RESOURCE_DEPLOY_TIMESTAMP = 'timestamp' From 1da99acd9608c2f465853e27cd8827e33afc5e6f Mon Sep 17 00:00:00 2001 From: Andrew Linxie Date: Tue, 7 Aug 2018 11:32:32 -0700 Subject: [PATCH 16/17] Change in message, change in function name --- databricks_cli/stack/cli.py | 2 +- tests/stack/test_cli.py | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/databricks_cli/stack/cli.py b/databricks_cli/stack/cli.py index 70e1a8ac..6c110ef0 100644 --- a/databricks_cli/stack/cli.py +++ b/databricks_cli/stack/cli.py @@ -103,7 +103,7 @@ def deploy(api_client, config_path, **kwargs): os.chdir(cli_dir) click.echo("Saving stack status to {}".format(status_path)) _save_json(status_path, new_stack_status) - click.echo('Warning- The stack status file is an automatically generated file that is' + click.echo('Note: The stack status file is an automatically generated file that is' ' depended on for the databricks stack CLI to function correctly.' ' Please do not edit the file.') click.echo('#' * 80) diff --git a/tests/stack/test_cli.py b/tests/stack/test_cli.py index a21b9fc7..99f979dc 100644 --- a/tests/stack/test_cli.py +++ b/tests/stack/test_cli.py @@ -37,7 +37,7 @@ TEST_STACK_PATH = 'stack/stack.json' -def _write_stack(tmpdir): +def _write_test_stack_config(tmpdir): """ Utility function to store the stack config in the filesystem and returns the config path. """ @@ -63,7 +63,7 @@ def test_deploy_kwargs(stack_api_mock, tmpdir): Calling the cli.deploy command should call the deploy function of the stack API and pass in possible kwargs into deploy function """ - config_path = _write_stack(tmpdir) + config_path = _write_test_stack_config(tmpdir) stack_api_mock.deploy = mock.MagicMock() runner = CliRunner() runner.invoke(cli.deploy, ['--overwrite', config_path]) @@ -79,7 +79,7 @@ def test_download_kwargs(stack_api_mock, tmpdir): Calling the cli.deploy command should call the deploy function of the stack API and pass in possible kwargs into deploy function """ - config_path = _write_stack(tmpdir) + config_path = _write_test_stack_config(tmpdir) stack_api_mock.download = mock.MagicMock() runner = CliRunner() runner.invoke(cli.download, ['--overwrite', config_path]) @@ -95,7 +95,7 @@ def test_deploy_default_kwargs(stack_api_mock, tmpdir): Calling the cli.deploy command without flags should call the deploy function of the stack API and pass in default kwargs into deploy function """ - config_path = _write_stack(tmpdir) + config_path = _write_test_stack_config(tmpdir) stack_api_mock.deploy = mock.MagicMock() runner = CliRunner() runner.invoke(cli.deploy, [config_path]) @@ -111,7 +111,7 @@ def test_download_default_kwargs(stack_api_mock, tmpdir): Calling the cli.deploy command should call the deploy function of the stack API and pass in possible kwargs into deploy function """ - config_path = _write_stack(tmpdir) + config_path = _write_test_stack_config(tmpdir) stack_api_mock.download = mock.MagicMock() runner = CliRunner() runner.invoke(cli.download, [config_path]) @@ -129,7 +129,7 @@ def test_deploy_relative_paths(stack_api_mock, tmpdir): contained so that relative paths for resources can be relative to the stack config instead of where CLI calls the API functions. """ - config_path = _write_stack(tmpdir) + config_path = _write_test_stack_config(tmpdir) config_working_dir = os.path.dirname(config_path) def _deploy(*args, **kwargs): @@ -148,7 +148,7 @@ def test_download_relative_paths(stack_api_mock, tmpdir): contained so that relative paths for resources can be relative to the stack config instead of where CLI calls the API functions. """ - config_path = _write_stack(tmpdir) + config_path = _write_test_stack_config(tmpdir) config_working_dir = os.path.dirname(config_path) def _download(*args, **kwargs): From e09c3ec46e3c1f4eee2b7fecc41e0706837afe1b Mon Sep 17 00:00:00 2001 From: Andrew Linxie Date: Wed, 8 Aug 2018 14:53:46 -0700 Subject: [PATCH 17/17] Added some more descriptions to CLI --- databricks_cli/stack/cli.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/databricks_cli/stack/cli.py b/databricks_cli/stack/cli.py index 6c110ef0..34797aaa 100644 --- a/databricks_cli/stack/cli.py +++ b/databricks_cli/stack/cli.py @@ -78,7 +78,7 @@ def _save_json(path, data): @click.command(context_settings=CONTEXT_SETTINGS, - short_help='Deploy stack given a JSON configuration of the stack') + short_help='Deploy a stack given a JSON configuration of the stack') @click.argument('config_path', type=click.Path(exists=True), required=True) @click.option('--overwrite', '-o', is_flag=True, default=False, show_default=True, help='Include to overwrite existing workspace notebooks and dbfs files') @@ -89,6 +89,13 @@ def _save_json(path, data): def deploy(api_client, config_path, **kwargs): """ Deploy a stack to the databricks workspace given a JSON stack configuration template. + + After deployment, a stack status will be saves at .deployed.json. Please + do not edit or move the file as it is generated through the CLI and is used for future + deployments of the stack. If you run into errors with the stack status at deployment, + please delete the stack status file and try the deployment again. If the problem persists, + please raise a Github issue on the Databricks CLI repository at + https://www.github.com/databricks/databricks-cli/issues """ click.echo('#' * 80) click.echo('Deploying stack at: {} with options: {}'.format(config_path, kwargs)) @@ -110,7 +117,7 @@ def deploy(api_client, config_path, **kwargs): @click.command(context_settings=CONTEXT_SETTINGS, - short_help='Download stack given a JSON configuration of the stack') + short_help='Download stack notebooks given a JSON configuration of the stack') @click.argument('config_path', type=click.Path(exists=True), required=True) @click.option('--overwrite', '-o', is_flag=True, help='Include to overwrite existing' ' notebooks in the local filesystem.') @@ -120,7 +127,8 @@ def deploy(api_client, config_path, **kwargs): @provide_api_client def download(api_client, config_path, **kwargs): """ - Download a stack to the local filesystem given a JSON stack configuration template. + Download workspace notebooks of a stack to the local filesystem given a JSON stack + configuration template. """ click.echo('#' * 80) click.echo('Downloading stack at: {} with options: {}'.format(config_path, kwargs))