diff --git a/ggshield/cmd/iac/scan/diff.py b/ggshield/cmd/iac/scan/diff.py index 3ca3084f6f..6962e0e97c 100644 --- a/ggshield/cmd/iac/scan/diff.py +++ b/ggshield/cmd/iac/scan/diff.py @@ -29,7 +29,7 @@ from ggshield.core.scan import ScanContext, ScanMode from ggshield.core.tar_utils import INDEX_REF, get_empty_tar from ggshield.core.text_utils import display_info -from ggshield.utils.files import is_filepath_excluded +from ggshield.utils.files import is_path_excluded from ggshield.utils.git_shell import ( Filemode, get_diff_files_status, @@ -161,7 +161,7 @@ def iac_scan_diff( file for file, mode in files_status.items() if mode in modified_modes - and not is_filepath_excluded(file, exclusion_regexes) + and not is_path_excluded(file, exclusion_regexes) and is_iac_file_path(file) ] diff --git a/ggshield/cmd/iac/scan/iac_scan_utils.py b/ggshield/cmd/iac/scan/iac_scan_utils.py index 2bc13fbc35..7ae2be3c57 100644 --- a/ggshield/cmd/iac/scan/iac_scan_utils.py +++ b/ggshield/cmd/iac/scan/iac_scan_utils.py @@ -14,7 +14,7 @@ from ggshield.core.filter import init_exclusion_regexes from ggshield.core.tar_utils import INDEX_REF, tar_from_ref_and_filepaths from ggshield.core.text_utils import display_error -from ggshield.utils.files import is_filepath_excluded +from ggshield.utils.files import is_path_excluded from ggshield.utils.git_shell import get_filepaths_from_ref, get_staged_filepaths from ggshield.verticals.iac.collection.iac_scan_collection import IaCResult from ggshield.verticals.iac.filter import is_iac_file_path @@ -62,7 +62,7 @@ def _accept_iac_file_on_path( ) -> bool: return is_iac_file_path(path) and ( exclusion_regexes is None - or not is_filepath_excluded(directory / path, exclusion_regexes) + or not is_path_excluded(directory / path, exclusion_regexes) ) @@ -138,7 +138,7 @@ def augment_unignored_issues( file_path = file_result.filename file_ignored_until = None for outdated_ignored_path in outdated_ignored_paths_dicts: - if is_filepath_excluded(file_path, outdated_ignored_path["regex"]): + if is_path_excluded(file_path, outdated_ignored_path["regex"]): if ( file_ignored_until is None or file_ignored_until < outdated_ignored_path["until"] diff --git a/ggshield/cmd/utils/files.py b/ggshield/cmd/utils/files.py index ba36098293..17ab1bf0a7 100644 --- a/ggshield/cmd/utils/files.py +++ b/ggshield/cmd/utils/files.py @@ -4,9 +4,9 @@ from click import UsageError -from ggshield.utils.files import is_filepath_excluded +from ggshield.utils.files import is_path_excluded def check_directory_not_ignored(directory: Path, exclusion_regexes: Set[re.Pattern]): - if is_filepath_excluded(directory, exclusion_regexes): + if is_path_excluded(directory.resolve(), exclusion_regexes): raise UsageError("An ignored file or directory cannot be scanned.") diff --git a/ggshield/core/config/utils.py b/ggshield/core/config/utils.py index 5d73c46f27..79ac09cb73 100644 --- a/ggshield/core/config/utils.py +++ b/ggshield/core/config/utils.py @@ -11,8 +11,9 @@ DEFAULT_CONFIG_FILENAME, USER_CONFIG_FILENAMES, ) -from ggshield.core.dirs import get_config_dir, get_user_home_dir +from ggshield.core.dirs import get_config_dir, get_project_root_dir, get_user_home_dir from ggshield.core.errors import UnexpectedError +from ggshield.utils.git_shell import GitExecutableNotFound def replace_in_keys(data: Union[List, Dict], old_char: str, new_char: str) -> None: @@ -93,8 +94,12 @@ def find_global_config_path(*, to_write: bool = False) -> Optional[Path]: def find_local_config_path() -> Optional[Path]: + try: + project_root_dir = get_project_root_dir(Path()) + except GitExecutableNotFound: + project_root_dir = Path() for filename in USER_CONFIG_FILENAMES: - path = Path(filename) + path = project_root_dir / filename if path.exists(): return path return None diff --git a/ggshield/core/dirs.py b/ggshield/core/dirs.py index fde1103a36..036b5149f0 100644 --- a/ggshield/core/dirs.py +++ b/ggshield/core/dirs.py @@ -35,9 +35,7 @@ def get_cache_dir() -> Path: def get_project_root_dir(path: Path) -> Path: - """Vulnerability path are relative to either git root path or - path provided for the scan. - + """ Returns the source basedir required to find file within filesystem. """ try: diff --git a/ggshield/core/scan/commit_utils.py b/ggshield/core/scan/commit_utils.py index f9357bf99a..e30d25e27b 100644 --- a/ggshield/core/scan/commit_utils.py +++ b/ggshield/core/scan/commit_utils.py @@ -3,7 +3,7 @@ from pathlib import Path from typing import Iterable, List, Optional, Set -from ggshield.utils.files import is_filepath_excluded +from ggshield.utils.files import is_path_excluded from ggshield.utils.git_shell import Filemode from .scannable import Scannable @@ -213,7 +213,7 @@ def parse_patch( diffs = re.split(r"^diff ", rest, flags=re.MULTILINE) for file_info, diff in zip(header.files, diffs): - if is_filepath_excluded(file_info.path, exclusion_regexes): + if is_path_excluded(file_info.path, exclusion_regexes): continue # extract document from diff: we must skip diff extended headers diff --git a/ggshield/utils/files.py b/ggshield/utils/files.py index 94f14c7569..bf60a8bb47 100644 --- a/ggshield/utils/files.py +++ b/ggshield/utils/files.py @@ -13,11 +13,17 @@ def __init__(self, path: Path): self.path = path -def is_filepath_excluded( - filepath: Union[str, Path], exclusion_regexes: Set[re.Pattern] +def is_path_excluded( + path: Union[str, Path], exclusion_regexes: Set[re.Pattern] ) -> bool: - filepath = Path(filepath) - return any(r.search(str(PurePosixPath(filepath))) for r in exclusion_regexes) + path = Path(path) + if path.is_dir(): + # The directory exclusion regexes have to end with a slash + # To check if path is excluded, we need to add a trailing slash + path_string = f"{PurePosixPath(path)}/" + else: + path_string = str(PurePosixPath(path)) + return any(r.search(path_string) for r in exclusion_regexes) def get_filepaths( @@ -54,7 +60,7 @@ def get_filepaths( _targets = path.rglob(r"*") for file_path in _targets: - if not is_filepath_excluded(file_path, exclusion_regexes): + if not is_path_excluded(file_path, exclusion_regexes): targets.add(file_path) return targets diff --git a/ggshield/verticals/sca/file_selection.py b/ggshield/verticals/sca/file_selection.py index 0a4ff2cdc1..d64b3c8304 100644 --- a/ggshield/verticals/sca/file_selection.py +++ b/ggshield/verticals/sca/file_selection.py @@ -10,7 +10,7 @@ from ggshield.core.scan.file import get_files_from_paths from ggshield.core.tar_utils import INDEX_REF from ggshield.core.text_utils import display_info -from ggshield.utils.files import is_filepath_excluded +from ggshield.utils.files import is_path_excluded from ggshield.utils.git_shell import get_filepaths_from_ref, get_staged_filepaths @@ -85,7 +85,7 @@ def sca_files_from_git_repo( files=[ str(path) for path in all_files - if not is_filepath_excluded(path, exclusion_regexes) + if not is_path_excluded(path, exclusion_regexes) ] ) if isinstance(sca_files_result, Detail): diff --git a/tests/unit/cmd/iac/test_scan_all.py b/tests/unit/cmd/iac/test_scan_all.py index 192e75945b..248261aa54 100644 --- a/tests/unit/cmd/iac/test_scan_all.py +++ b/tests/unit/cmd/iac/test_scan_all.py @@ -13,6 +13,7 @@ from ggshield.__main__ import cli from ggshield.core.errors import ExitCode +from ggshield.utils.os import cd from tests.conftest import IAC_SINGLE_VULNERABILITY from tests.repository import Repository from tests.unit.conftest import assert_invoke_exited_with, my_vcr @@ -244,16 +245,15 @@ def test_iac_scan_all_verbose(cli_fs_runner: CliRunner, cli_command) -> None: @patch("pygitguardian.client.GGClient.iac_directory_scan") def test_iac_scan_all_ignored_directory( - iac_directory_scan_mock: Mock, cli_fs_runner: CliRunner, cli_command + iac_directory_scan_mock: Mock, cli_fs_runner: CliRunner, cli_command, tmp_path: Path ) -> None: """ GIVEN a directory which is ignored WHEN running the iac scan all command on this directory THEN an error is raised """ - path = Path(".") - repo = Repository.create(path) - iac_file = path / "iac_file.tf" + repo = Repository.create(tmp_path) + iac_file = tmp_path / "iac_file.tf" iac_file.write_text(IAC_SINGLE_VULNERABILITY) repo.add(iac_file) repo.create_commit() @@ -265,8 +265,8 @@ def test_iac_scan_all_ignored_directory( "scan", "all", "--ignore-path", - str(path), - str(path), + f"{tmp_path.name}/", + str(tmp_path), ], ) @@ -275,6 +275,50 @@ def test_iac_scan_all_ignored_directory( iac_directory_scan_mock.assert_not_called() +@patch("pygitguardian.client.GGClient.iac_directory_scan") +def test_iac_scan_all_ignored_directory_config( + iac_directory_scan_mock: Mock, cli_fs_runner: CliRunner, cli_command, tmp_path: Path +) -> None: + """ + GIVEN a directory which is ignored in the config + WHEN running the iac scan all command on this directory + THEN an error is raised + """ + repo = Repository.create(tmp_path) + dir = tmp_path / "dir" + dir.mkdir() + subdir = dir / "subdir" + subdir.mkdir() + iac_file = subdir / "iac_file.tf" + iac_file.write_text(IAC_SINGLE_VULNERABILITY) + repo.add(iac_file) + repo.create_commit() + + config = """ +version: 2 +iac: + ignored-paths: + - "dir/subdir/" + +""" + (tmp_path / ".gitguardian.yaml").write_text(config) + + with cd(str(dir)): + result = cli_fs_runner.invoke( + cli, + [ + "iac", + "scan", + "all", + "subdir", + ], + ) + + assert_invoke_exited_with(result, ExitCode.USAGE_ERROR) + assert "An ignored file or directory cannot be scanned." in result.stdout + iac_directory_scan_mock.assert_not_called() + + @patch("pygitguardian.GGClient.iac_directory_scan") def test_iac_scan_all_context_repository( scan_mock: Mock, tmp_path: Path, cli_fs_runner: CliRunner, cli_command diff --git a/tests/unit/cmd/iac/test_scan_diff.py b/tests/unit/cmd/iac/test_scan_diff.py index 4f90ef372a..1c8cccf74c 100644 --- a/tests/unit/cmd/iac/test_scan_diff.py +++ b/tests/unit/cmd/iac/test_scan_diff.py @@ -183,7 +183,7 @@ def test_iac_scan_diff_ignored_directory( "--ref", initial_commit, "--ignore-path", - str(path), + f"{path.name}/", str(path), ], ) diff --git a/tests/unit/cmd/sca/test_scan.py b/tests/unit/cmd/sca/test_scan.py index f24cf9576d..ac279eef33 100644 --- a/tests/unit/cmd/sca/test_scan.py +++ b/tests/unit/cmd/sca/test_scan.py @@ -386,7 +386,7 @@ def test_sca_text_handler_ordering(patch_scan_all, cli_fs_runner): @patch("pygitguardian.GGClient.compute_sca_files") -def test_scan_all_ignored_directory( +def test_sca_scan_all_ignored_directory( compute_sca_files_mock: Mock, cli_fs_runner: click.testing.CliRunner, dummy_sca_repo: Repository, @@ -404,7 +404,7 @@ def test_scan_all_ignored_directory( "scan", "all", "--ignore-path", - dummy_sca_repo.path.name, + f"{dummy_sca_repo.path.name}/", str(dummy_sca_repo.path), ], ) @@ -414,6 +414,53 @@ def test_scan_all_ignored_directory( compute_sca_files_mock.assert_not_called() +@patch("pygitguardian.GGClient.compute_sca_files") +def test_sca_scan_all_ignored_directory_config( + compute_sca_files_mock: Mock, + cli_fs_runner: CliRunner, + tmp_path: Path, + pipfile_lock_with_vuln: str, +) -> None: + """ + GIVEN a directory which is ignored in the config + WHEN running the sca scan all command on this directory + THEN an error is raised + """ + repo = Repository.create(tmp_path) + dir = tmp_path / "dir" + dir.mkdir() + subdir = dir / "subdir" + subdir.mkdir() + pipfile_lock = subdir / "Pipfile.lock" + pipfile_lock.write_text(pipfile_lock_with_vuln) + repo.add(pipfile_lock) + repo.create_commit() + + config = """ +version: 2 +sca: + ignored-paths: + - "dir/subdir/" + +""" + (tmp_path / ".gitguardian.yaml").write_text(config) + + with cd(str(dir)): + result = cli_fs_runner.invoke( + cli, + [ + "sca", + "scan", + "all", + "subdir", + ], + ) + + assert_invoke_exited_with(result, ExitCode.USAGE_ERROR) + assert "An ignored file or directory cannot be scanned." in result.stdout + compute_sca_files_mock.assert_not_called() + + @patch("pygitguardian.GGClient.compute_sca_files") def test_sca_scan_diff_ignored_directory( compute_sca_files_mock: Mock, @@ -435,7 +482,7 @@ def test_sca_scan_diff_ignored_directory( "--ref", "branch_without_vuln", "--ignore-path", - dummy_sca_repo.path.name, + f"{dummy_sca_repo.path.name}/", str(dummy_sca_repo.path), ], ) diff --git a/tests/unit/cmd/scan/test_path.py b/tests/unit/cmd/scan/test_path.py index fec1fdda5a..4a24471f99 100644 --- a/tests/unit/cmd/scan/test_path.py +++ b/tests/unit/cmd/scan/test_path.py @@ -197,7 +197,7 @@ def test_files_verbose_yes(self, cli_fs_runner): assert "No secrets have been found" in result.output @patch("ggshield.verticals.secret.secret_scanner.SecretScanner.scan") - def test_scan_ignored_directory(self, scan_mock, cli_fs_runner): + def test_scan_ignored_file(self, scan_mock, cli_fs_runner): self.create_files() config = """ version: 2 diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 742016072d..933391e55c 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -16,6 +16,11 @@ from ggshield.core.cache import Cache from ggshield.core.url_utils import dashboard_to_api_url +from ggshield.utils.git_shell import ( + _get_git_path, + _git_rev_parse_absolute, + read_git_file, +) from tests.conftest import GG_VALID_TOKEN @@ -656,3 +661,10 @@ def make_fake_path_inaccessible(fs: FakeFilesystem, path: Union[str, Path]): # `force_unix_mode` is required for Windows. # See fs.chmod(path, 0o0000, force_unix_mode=True) + + +@pytest.fixture(autouse=True) +def clear_cache(): + _get_git_path.cache_clear() + _git_rev_parse_absolute.cache_clear() + read_git_file.cache_clear() diff --git a/tests/unit/core/config/test_utils.py b/tests/unit/core/config/test_utils.py index 9119976686..bed7437928 100644 --- a/tests/unit/core/config/test_utils.py +++ b/tests/unit/core/config/test_utils.py @@ -1,14 +1,18 @@ from dataclasses import dataclass, field +from pathlib import Path from typing import Any, Dict, List, Optional, Set import pytest from ggshield.core.config.utils import ( + find_local_config_path, remove_common_dict_items, remove_url_trailing_slash, replace_in_keys, update_from_other_instance, ) +from ggshield.utils.os import cd +from tests.repository import Repository def test_replace_in_keys(): @@ -112,3 +116,21 @@ def test_remove_common_dict_items( def test_remove_url_trailing_slash(): result = remove_url_trailing_slash("https://dashboard.gitguardian.com/") assert result == "https://dashboard.gitguardian.com" + + +def test_find_config_in_root(tmp_path: Path): + """ + GIVEN a repo with a config file in the root and a subdirectory + WHEN trying to find the local config while inside the subdirectory + THEN the config in the root is returned + """ + Repository.create(tmp_path) + + config_path = tmp_path / ".gitguardian.yml" + config_path.touch() + + dir_path = tmp_path / "dir" + dir_path.mkdir() + + with cd(str(dir_path)): + assert find_local_config_path() == config_path diff --git a/tests/unit/core/scan/test_scan_context.py b/tests/unit/core/scan/test_scan_context.py index 7e76a8f014..0eafa1d492 100644 --- a/tests/unit/core/scan/test_scan_context.py +++ b/tests/unit/core/scan/test_scan_context.py @@ -114,7 +114,9 @@ def test_ci_no_env(env, fake_url_repo: Repository) -> None: WHEN there is no repository url in the environment THEN the remote url is sent by default """ - with mock.patch.dict(os.environ, env, clear=True): + # Copying the path is needed for windows to find git + environ = {"PATH": os.environ.get("PATH"), **env} + with mock.patch.dict(os.environ, environ, clear=True): context = ScanContext( scan_mode=ScanMode.PATH, command_path="ggshield secret scan path", diff --git a/tests/unit/core/test_client.py b/tests/unit/core/test_client.py index 2a2c6f305c..c5f47ba49e 100644 --- a/tests/unit/core/test_client.py +++ b/tests/unit/core/test_client.py @@ -53,11 +53,15 @@ def test_retrieve_client_invalid_api_url(): THEN it raises a UsageError """ url = "no-scheme.com" + environ = os.environ.copy() + environ.pop("GITGUARDIAN_INSTANCE", None) + environ["GITGUARDIAN_API_URL"] = url + with pytest.raises( click.UsageError, match=f"Invalid scheme for API URL '{url}', expected HTTPS", ): - with patch.dict(os.environ, {"GITGUARDIAN_API_URL": url}): + with patch.dict(os.environ, environ, clear=True): create_client_from_config(Config()) diff --git a/tests/unit/utils/test_files.py b/tests/unit/utils/test_files.py index 3a05099414..6499f0c47f 100644 --- a/tests/unit/utils/test_files.py +++ b/tests/unit/utils/test_files.py @@ -8,7 +8,7 @@ import pytest from ggshield.core.tar_utils import get_empty_tar -from ggshield.utils.files import is_filepath_excluded +from ggshield.utils.files import is_path_excluded def test_get_empty_tar(): @@ -33,8 +33,8 @@ def test_get_empty_tar(): (Path("dir/foo"), {"foo"}, True), ], ) -def test_is_filepath_excluded( +def test_is_path_excluded( path: Union[str, Path], regexes: Set[str], excluded: bool ) -> None: regexes = {re.compile(x) for x in regexes} - assert is_filepath_excluded(path, regexes) == excluded + assert is_path_excluded(path, regexes) == excluded