diff --git a/CHANGELOG.md b/CHANGELOG.md index 6d0b53e3710..52b5dcfb14a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -42,6 +42,7 @@ The types of changes are: ### Fixed - Fixed bug with unescaped table names in mysql queries [#5072](https://github.com/ethyca/fides/pull/5072/) - Fixed bug with unresponsive messaging ui [#5081](https://github.com/ethyca/fides/pull/5081/) +- Fixed FidesKey constructor bugs in CLI [#5113](https://github.com/ethyca/fides/pull/5113) ## [2.40.0](https://github.com/ethyca/fides/compare/2.39.2...2.40.0) diff --git a/src/fides/core/annotate_dataset.py b/src/fides/core/annotate_dataset.py index 920fd405fe1..bc2541dc8bd 100644 --- a/src/fides/core/annotate_dataset.py +++ b/src/fides/core/annotate_dataset.py @@ -82,7 +82,7 @@ def get_data_categories_annotation( dataset_member, valid_categories ) - return [FidesKey(value) for value in user_response] + return [str(FidesKey(value)) for value in user_response] # type: ignore def annotate_dataset( diff --git a/src/fides/core/dataset.py b/src/fides/core/dataset.py index 9520273d842..fc28ccd823f 100644 --- a/src/fides/core/dataset.py +++ b/src/fides/core/dataset.py @@ -140,8 +140,10 @@ def make_dataset_key_unique( to avoid naming collisions. """ - dataset.fides_key = FidesKey( - generate_unique_fides_key(dataset.fides_key, database_host, database_name) + dataset.fides_key = str( # type: ignore + FidesKey( + generate_unique_fides_key(dataset.fides_key, database_host, database_name) + ) ) dataset.meta = {"database_host": database_host, "database_name": database_name} return dataset diff --git a/tests/ctl/cli/test_cli.py b/tests/ctl/cli/test_cli.py index ca3339b4971..86679e44d6b 100644 --- a/tests/ctl/cli/test_cli.py +++ b/tests/ctl/cli/test_cli.py @@ -4,7 +4,9 @@ from json import dump, loads from typing import Generator +import click import pytest +import yaml from click.testing import CliRunner from git.repo import Repo from py._path.local import LocalPath @@ -217,6 +219,61 @@ def test_pull_all( assert result.exit_code == 0 +@pytest.mark.integration +class TestAnnotate: + + def test_annotate( + self, + test_config_path: str, + test_cli_runner: CliRunner, + ) -> None: + """ + Test annotating dataset allowing you to interactively annotate the dataset with data categories + """ + with open( + "tests/ctl/data/dataset_missing_categories.yml", "r" + ) as current_dataset_yml: + dataset_yml = yaml.safe_load(current_dataset_yml) + # Confirm starting state, that the first field has no data categories + assert ( + "data_categories" + not in dataset_yml["dataset"][0]["collections"][0]["fields"][0] + ) + + result = test_cli_runner.invoke( + cli, + [ + "-f", + test_config_path, + "annotate", + "dataset", + "tests/ctl/data/dataset_missing_categories.yml", + ], + input="user\n", + ) + print(result.output) + with open("tests/ctl/data/dataset_missing_categories.yml", "r") as dataset_yml: + # Helps assert that the data category was output correctly + dataset_yml = yaml.safe_load(dataset_yml) + assert dataset_yml["dataset"][0]["collections"][0]["fields"][0][ + "data_categories" + ] == ["user"] + + # Now remove the data category that was written by annotate dataset + del dataset_yml["dataset"][0]["collections"][0]["fields"][0][ + "data_categories" + ] + + with open( + "tests/ctl/data/dataset_missing_categories.yml", "w" + ) as current_dataset_yml: + # Restore the original contents to the file + yaml.safe_dump(dataset_yml, current_dataset_yml) + + assert result.exit_code == 0 + print(result.output) + + @pytest.mark.integration def test_audit(test_config_path: str, test_cli_runner: CliRunner) -> None: result = test_cli_runner.invoke(cli, ["-f", test_config_path, "evaluate", "-a"]) @@ -666,6 +723,12 @@ def test_generate_dataset_db_with_connection_string( print(result.output) assert result.exit_code == 0 + with open(tmp_file, "r") as dataset_yml: + # Helps assert that the file was output correctly, namely, fides_keys were serialized as strings + # and not a FidesKey python object + dataset = yaml.safe_load(dataset_yml).get("dataset", []) + assert isinstance(dataset[0]["fides_key"], str) + @pytest.mark.integration def test_generate_dataset_db_with_credentials_id( self, diff --git a/tests/ctl/data/dataset_missing_categories.yml b/tests/ctl/data/dataset_missing_categories.yml new file mode 100644 index 00000000000..51aab93782f --- /dev/null +++ b/tests/ctl/data/dataset_missing_categories.yml @@ -0,0 +1,13 @@ +dataset: +- collections: + - description: Organization information + fields: + - name: city + - data_categories: + - account.contact.state + name: state + name: organization + description: Sample dataset to be annotated + fides_key: test_missing_data_categories + name: Sample Dataset + organization_fides_key: default_organization