Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
<!--
A new scriv changelog fragment.

Uncomment the section that is right (remove the HTML comment wrapper).
For top level release notes, leave all the headers commented out.
-->

<!--
### Removed

- A bullet item for the Removed category.

-->
<!--
### Added

- A bullet item for the Added category.

-->
<!--
### Changed

- A bullet item for the Changed category.

-->
<!--
### Deprecated

- A bullet item for the Deprecated category.

-->

### Fixed

- Skip non-seekable files instead of crashing.

<!--
### Security

- A bullet item for the Security category.

-->
3 changes: 2 additions & 1 deletion ggshield/core/scan/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@
from .file import File, create_files_from_paths
from .scan_context import ScanContext
from .scan_mode import ScanMode
from .scannable import DecodeError, Scannable, StringScannable
from .scannable import DecodeError, NonSeekableFileError, Scannable, StringScannable


__all__ = [
"create_files_from_paths",
"Commit",
"DecodeError",
"File",
"NonSeekableFileError",
"ScanContext",
"ScanMode",
"Scannable",
Expand Down
15 changes: 12 additions & 3 deletions ggshield/core/scan/scannable.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@ class DecodeError(Exception):
pass


class NonSeekableFileError(Exception):
"""Raised when a file cannot be seeked"""

pass


class Scannable(ABC):
"""Base class for content that can be scanned by GGShield"""

Expand Down Expand Up @@ -143,8 +149,12 @@ def _is_file_longer_than(
Raises DecodeError if the file cannot be decoded.
"""
# Get the byte size
assert fp.seekable()
byte_size = fp.seek(0, SEEK_END)
# Note: IOBase.seekable() returns True on some non-seekable files like /proc/self/mounts
try:
byte_size = fp.seek(0, SEEK_END)
fp.seek(0, SEEK_SET)
except OSError as exc:
raise NonSeekableFileError() from exc

if byte_size > max_utf8_encoded_size * UTF8_TO_WORSE_OTHER_ENCODING_RATIO:
# Even if the file used the worst encoding (UTF-32), encoding the content of
Expand All @@ -153,7 +163,6 @@ def _is_file_longer_than(
return True, None, None

# Determine the encoding
fp.seek(0, SEEK_SET)
charset_matches = charset_normalizer.from_fp(fp)
charset_match = charset_matches.best()
if charset_match is None:
Expand Down
4 changes: 4 additions & 0 deletions ggshield/verticals/secret/secret_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from ggshield.core.constants import MAX_WORKERS
from ggshield.core.errors import handle_api_error
from ggshield.core.scan import DecodeError, ScanContext, Scannable
from ggshield.core.scan.scannable import NonSeekableFileError
from ggshield.core.scanner_ui.scanner_ui import ScannerUI
from ggshield.core.text_utils import pluralize

Expand Down Expand Up @@ -157,6 +158,9 @@ def _start_scans(
except DecodeError:
scanner_ui.on_skipped(scannable, "can't detect encoding")
continue
except NonSeekableFileError:
scanner_ui.on_skipped(scannable, "file cannot be seeked")
continue

if content:
if (
Expand Down
24 changes: 23 additions & 1 deletion tests/unit/core/scan/test_scannable.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from pathlib import Path
from unittest.mock import patch

import pytest

from ggshield.core.scan import StringScannable
from ggshield.core.scan import File, StringScannable
from ggshield.core.scan.scannable import NonSeekableFileError


def test_string_scannable_path():
Expand Down Expand Up @@ -32,3 +34,23 @@ def test_string_scannable_is_longer_than(content, is_longer):
"""
scannable = StringScannable(content=content, url="u")
assert scannable.is_longer_than(50) == is_longer


@patch("pathlib.Path.open")
def test_file_non_seekable(mock_open, tmp_path):
"""
GIVEN a File instance
AND the file reports as seekable but seeking operations fail
WHEN is_longer_than() is called on it
THEN it raises NonSeekableFileError
"""
mock_file = mock_open.return_value.__enter__.return_value
mock_file.seekable.return_value = True
mock_file.seek.side_effect = OSError(22, "Invalid argument")

test_file = tmp_path / "test.txt"
test_file.write_text("test content")
file_obj = File(test_file)

with pytest.raises(NonSeekableFileError):
file_obj.is_longer_than(1000)
Loading