Skip to content

Commit

Permalink
Merge pull request GitGuardian#857 from GitGuardian/jgriffe/SCA-1539/…
Browse files Browse the repository at this point in the history
…fix-ignored-paths-in-ggshield

fix: Fix ignored paths in GGShield
  • Loading branch information
gg-jonathangriffe authored Mar 11, 2024
2 parents 9c2d858 + f760485 commit 621f2c2
Show file tree
Hide file tree
Showing 17 changed files with 177 additions and 37 deletions.
4 changes: 2 additions & 2 deletions ggshield/cmd/iac/scan/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from ggshield.core.scan import ScanContext, ScanMode
from ggshield.core.tar_utils import INDEX_REF, get_empty_tar
from ggshield.core.text_utils import display_info
from ggshield.utils.files import is_filepath_excluded
from ggshield.utils.files import is_path_excluded
from ggshield.utils.git_shell import (
Filemode,
get_diff_files_status,
Expand Down Expand Up @@ -161,7 +161,7 @@ def iac_scan_diff(
file
for file, mode in files_status.items()
if mode in modified_modes
and not is_filepath_excluded(file, exclusion_regexes)
and not is_path_excluded(file, exclusion_regexes)
and is_iac_file_path(file)
]

Expand Down
6 changes: 3 additions & 3 deletions ggshield/cmd/iac/scan/iac_scan_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from ggshield.core.filter import init_exclusion_regexes
from ggshield.core.tar_utils import INDEX_REF, tar_from_ref_and_filepaths
from ggshield.core.text_utils import display_error
from ggshield.utils.files import is_filepath_excluded
from ggshield.utils.files import is_path_excluded
from ggshield.utils.git_shell import get_filepaths_from_ref, get_staged_filepaths
from ggshield.verticals.iac.collection.iac_scan_collection import IaCResult
from ggshield.verticals.iac.filter import is_iac_file_path
Expand Down Expand Up @@ -62,7 +62,7 @@ def _accept_iac_file_on_path(
) -> bool:
return is_iac_file_path(path) and (
exclusion_regexes is None
or not is_filepath_excluded(directory / path, exclusion_regexes)
or not is_path_excluded(directory / path, exclusion_regexes)
)


Expand Down Expand Up @@ -138,7 +138,7 @@ def augment_unignored_issues(
file_path = file_result.filename
file_ignored_until = None
for outdated_ignored_path in outdated_ignored_paths_dicts:
if is_filepath_excluded(file_path, outdated_ignored_path["regex"]):
if is_path_excluded(file_path, outdated_ignored_path["regex"]):
if (
file_ignored_until is None
or file_ignored_until < outdated_ignored_path["until"]
Expand Down
4 changes: 2 additions & 2 deletions ggshield/cmd/utils/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@

from click import UsageError

from ggshield.utils.files import is_filepath_excluded
from ggshield.utils.files import is_path_excluded


def check_directory_not_ignored(directory: Path, exclusion_regexes: Set[re.Pattern]):
if is_filepath_excluded(directory, exclusion_regexes):
if is_path_excluded(directory.resolve(), exclusion_regexes):
raise UsageError("An ignored file or directory cannot be scanned.")
9 changes: 7 additions & 2 deletions ggshield/core/config/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@
DEFAULT_CONFIG_FILENAME,
USER_CONFIG_FILENAMES,
)
from ggshield.core.dirs import get_config_dir, get_user_home_dir
from ggshield.core.dirs import get_config_dir, get_project_root_dir, get_user_home_dir
from ggshield.core.errors import UnexpectedError
from ggshield.utils.git_shell import GitExecutableNotFound


def replace_in_keys(data: Union[List, Dict], old_char: str, new_char: str) -> None:
Expand Down Expand Up @@ -93,8 +94,12 @@ def find_global_config_path(*, to_write: bool = False) -> Optional[Path]:


def find_local_config_path() -> Optional[Path]:
try:
project_root_dir = get_project_root_dir(Path())
except GitExecutableNotFound:
project_root_dir = Path()
for filename in USER_CONFIG_FILENAMES:
path = Path(filename)
path = project_root_dir / filename
if path.exists():
return path
return None
Expand Down
4 changes: 1 addition & 3 deletions ggshield/core/dirs.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,7 @@ def get_cache_dir() -> Path:


def get_project_root_dir(path: Path) -> Path:
"""Vulnerability path are relative to either git root path or
path provided for the scan.
"""
Returns the source basedir required to find file within filesystem.
"""
try:
Expand Down
4 changes: 2 additions & 2 deletions ggshield/core/scan/commit_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from pathlib import Path
from typing import Iterable, List, Optional, Set

from ggshield.utils.files import is_filepath_excluded
from ggshield.utils.files import is_path_excluded
from ggshield.utils.git_shell import Filemode

from .scannable import Scannable
Expand Down Expand Up @@ -213,7 +213,7 @@ def parse_patch(

diffs = re.split(r"^diff ", rest, flags=re.MULTILINE)
for file_info, diff in zip(header.files, diffs):
if is_filepath_excluded(file_info.path, exclusion_regexes):
if is_path_excluded(file_info.path, exclusion_regexes):
continue

# extract document from diff: we must skip diff extended headers
Expand Down
16 changes: 11 additions & 5 deletions ggshield/utils/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,17 @@ def __init__(self, path: Path):
self.path = path


def is_filepath_excluded(
filepath: Union[str, Path], exclusion_regexes: Set[re.Pattern]
def is_path_excluded(
path: Union[str, Path], exclusion_regexes: Set[re.Pattern]
) -> bool:
filepath = Path(filepath)
return any(r.search(str(PurePosixPath(filepath))) for r in exclusion_regexes)
path = Path(path)
if path.is_dir():
# The directory exclusion regexes have to end with a slash
# To check if path is excluded, we need to add a trailing slash
path_string = f"{PurePosixPath(path)}/"
else:
path_string = str(PurePosixPath(path))
return any(r.search(path_string) for r in exclusion_regexes)


def get_filepaths(
Expand Down Expand Up @@ -54,7 +60,7 @@ def get_filepaths(
_targets = path.rglob(r"*")

for file_path in _targets:
if not is_filepath_excluded(file_path, exclusion_regexes):
if not is_path_excluded(file_path, exclusion_regexes):
targets.add(file_path)
return targets

Expand Down
4 changes: 2 additions & 2 deletions ggshield/verticals/sca/file_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from ggshield.core.scan.file import get_files_from_paths
from ggshield.core.tar_utils import INDEX_REF
from ggshield.core.text_utils import display_info
from ggshield.utils.files import is_filepath_excluded
from ggshield.utils.files import is_path_excluded
from ggshield.utils.git_shell import get_filepaths_from_ref, get_staged_filepaths


Expand Down Expand Up @@ -85,7 +85,7 @@ def sca_files_from_git_repo(
files=[
str(path)
for path in all_files
if not is_filepath_excluded(path, exclusion_regexes)
if not is_path_excluded(path, exclusion_regexes)
]
)
if isinstance(sca_files_result, Detail):
Expand Down
56 changes: 50 additions & 6 deletions tests/unit/cmd/iac/test_scan_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

from ggshield.__main__ import cli
from ggshield.core.errors import ExitCode
from ggshield.utils.os import cd
from tests.conftest import IAC_SINGLE_VULNERABILITY
from tests.repository import Repository
from tests.unit.conftest import assert_invoke_exited_with, my_vcr
Expand Down Expand Up @@ -244,16 +245,15 @@ def test_iac_scan_all_verbose(cli_fs_runner: CliRunner, cli_command) -> None:

@patch("pygitguardian.client.GGClient.iac_directory_scan")
def test_iac_scan_all_ignored_directory(
iac_directory_scan_mock: Mock, cli_fs_runner: CliRunner, cli_command
iac_directory_scan_mock: Mock, cli_fs_runner: CliRunner, cli_command, tmp_path: Path
) -> None:
"""
GIVEN a directory which is ignored
WHEN running the iac scan all command on this directory
THEN an error is raised
"""
path = Path(".")
repo = Repository.create(path)
iac_file = path / "iac_file.tf"
repo = Repository.create(tmp_path)
iac_file = tmp_path / "iac_file.tf"
iac_file.write_text(IAC_SINGLE_VULNERABILITY)
repo.add(iac_file)
repo.create_commit()
Expand All @@ -265,8 +265,8 @@ def test_iac_scan_all_ignored_directory(
"scan",
"all",
"--ignore-path",
str(path),
str(path),
f"{tmp_path.name}/",
str(tmp_path),
],
)

Expand All @@ -275,6 +275,50 @@ def test_iac_scan_all_ignored_directory(
iac_directory_scan_mock.assert_not_called()


@patch("pygitguardian.client.GGClient.iac_directory_scan")
def test_iac_scan_all_ignored_directory_config(
iac_directory_scan_mock: Mock, cli_fs_runner: CliRunner, cli_command, tmp_path: Path
) -> None:
"""
GIVEN a directory which is ignored in the config
WHEN running the iac scan all command on this directory
THEN an error is raised
"""
repo = Repository.create(tmp_path)
dir = tmp_path / "dir"
dir.mkdir()
subdir = dir / "subdir"
subdir.mkdir()
iac_file = subdir / "iac_file.tf"
iac_file.write_text(IAC_SINGLE_VULNERABILITY)
repo.add(iac_file)
repo.create_commit()

config = """
version: 2
iac:
ignored-paths:
- "dir/subdir/"
"""
(tmp_path / ".gitguardian.yaml").write_text(config)

with cd(str(dir)):
result = cli_fs_runner.invoke(
cli,
[
"iac",
"scan",
"all",
"subdir",
],
)

assert_invoke_exited_with(result, ExitCode.USAGE_ERROR)
assert "An ignored file or directory cannot be scanned." in result.stdout
iac_directory_scan_mock.assert_not_called()


@patch("pygitguardian.GGClient.iac_directory_scan")
def test_iac_scan_all_context_repository(
scan_mock: Mock, tmp_path: Path, cli_fs_runner: CliRunner, cli_command
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/cmd/iac/test_scan_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def test_iac_scan_diff_ignored_directory(
"--ref",
initial_commit,
"--ignore-path",
str(path),
f"{path.name}/",
str(path),
],
)
Expand Down
53 changes: 50 additions & 3 deletions tests/unit/cmd/sca/test_scan.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@ def test_sca_text_handler_ordering(patch_scan_all, cli_fs_runner):


@patch("pygitguardian.GGClient.compute_sca_files")
def test_scan_all_ignored_directory(
def test_sca_scan_all_ignored_directory(
compute_sca_files_mock: Mock,
cli_fs_runner: click.testing.CliRunner,
dummy_sca_repo: Repository,
Expand All @@ -404,7 +404,7 @@ def test_scan_all_ignored_directory(
"scan",
"all",
"--ignore-path",
dummy_sca_repo.path.name,
f"{dummy_sca_repo.path.name}/",
str(dummy_sca_repo.path),
],
)
Expand All @@ -414,6 +414,53 @@ def test_scan_all_ignored_directory(
compute_sca_files_mock.assert_not_called()


@patch("pygitguardian.GGClient.compute_sca_files")
def test_sca_scan_all_ignored_directory_config(
compute_sca_files_mock: Mock,
cli_fs_runner: CliRunner,
tmp_path: Path,
pipfile_lock_with_vuln: str,
) -> None:
"""
GIVEN a directory which is ignored in the config
WHEN running the sca scan all command on this directory
THEN an error is raised
"""
repo = Repository.create(tmp_path)
dir = tmp_path / "dir"
dir.mkdir()
subdir = dir / "subdir"
subdir.mkdir()
pipfile_lock = subdir / "Pipfile.lock"
pipfile_lock.write_text(pipfile_lock_with_vuln)
repo.add(pipfile_lock)
repo.create_commit()

config = """
version: 2
sca:
ignored-paths:
- "dir/subdir/"
"""
(tmp_path / ".gitguardian.yaml").write_text(config)

with cd(str(dir)):
result = cli_fs_runner.invoke(
cli,
[
"sca",
"scan",
"all",
"subdir",
],
)

assert_invoke_exited_with(result, ExitCode.USAGE_ERROR)
assert "An ignored file or directory cannot be scanned." in result.stdout
compute_sca_files_mock.assert_not_called()


@patch("pygitguardian.GGClient.compute_sca_files")
def test_sca_scan_diff_ignored_directory(
compute_sca_files_mock: Mock,
Expand All @@ -435,7 +482,7 @@ def test_sca_scan_diff_ignored_directory(
"--ref",
"branch_without_vuln",
"--ignore-path",
dummy_sca_repo.path.name,
f"{dummy_sca_repo.path.name}/",
str(dummy_sca_repo.path),
],
)
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/cmd/scan/test_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ def test_files_verbose_yes(self, cli_fs_runner):
assert "No secrets have been found" in result.output

@patch("ggshield.verticals.secret.secret_scanner.SecretScanner.scan")
def test_scan_ignored_directory(self, scan_mock, cli_fs_runner):
def test_scan_ignored_file(self, scan_mock, cli_fs_runner):
self.create_files()
config = """
version: 2
Expand Down
12 changes: 12 additions & 0 deletions tests/unit/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@

from ggshield.core.cache import Cache
from ggshield.core.url_utils import dashboard_to_api_url
from ggshield.utils.git_shell import (
_get_git_path,
_git_rev_parse_absolute,
read_git_file,
)
from tests.conftest import GG_VALID_TOKEN


Expand Down Expand Up @@ -656,3 +661,10 @@ def make_fake_path_inaccessible(fs: FakeFilesystem, path: Union[str, Path]):
# `force_unix_mode` is required for Windows.
# See <https://pytest-pyfakefs.readthedocs.io/en/latest/usage.html#set-file-as-inaccessible-under-windows>
fs.chmod(path, 0o0000, force_unix_mode=True)


@pytest.fixture(autouse=True)
def clear_cache():
_get_git_path.cache_clear()
_git_rev_parse_absolute.cache_clear()
read_git_file.cache_clear()
Loading

0 comments on commit 621f2c2

Please sign in to comment.