Skip to content

Commit bc03b57

Browse files
authored
CM-51935 - Fix pre-commit hook for bare repositories (#341)
1 parent d7c17b9 commit bc03b57

File tree

4 files changed

+170
-8
lines changed

4 files changed

+170
-8
lines changed

cycode/cli/apps/scan/commit_range_scanner.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
get_diff_file_content,
2626
get_diff_file_path,
2727
get_pre_commit_modified_documents,
28+
get_safe_head_reference_for_diff,
2829
parse_commit_range_sast,
2930
parse_commit_range_sca,
3031
)
@@ -271,7 +272,9 @@ def _scan_sca_pre_commit(ctx: typer.Context, repo_path: str) -> None:
271272

272273
def _scan_secret_pre_commit(ctx: typer.Context, repo_path: str) -> None:
273274
progress_bar = ctx.obj['progress_bar']
274-
diff_index = git_proxy.get_repo(repo_path).index.diff(consts.GIT_HEAD_COMMIT_REV, create_patch=True, R=True)
275+
repo = git_proxy.get_repo(repo_path)
276+
head_reference = get_safe_head_reference_for_diff(repo)
277+
diff_index = repo.index.diff(head_reference, create_patch=True, R=True)
275278

276279
progress_bar.set_section_length(ScanProgressBarSection.PREPARE_LOCAL_FILES, len(diff_index))
277280

cycode/cli/consts.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,7 @@
261261
# git consts
262262
COMMIT_DIFF_DELETED_FILE_CHANGE_TYPE = 'D'
263263
GIT_HEAD_COMMIT_REV = 'HEAD'
264+
GIT_EMPTY_TREE_OBJECT = '4b825dc642cb6eb9a060e54bf8d69288fbee4904'
264265
EMPTY_COMMIT_SHA = '0000000000000000000000000000000000000000'
265266
GIT_PUSH_OPTION_COUNT_ENV_VAR_NAME = 'GIT_PUSH_OPTION_COUNT'
266267
GIT_PUSH_OPTION_ENV_VAR_PREFIX = 'GIT_PUSH_OPTION_'

cycode/cli/files_collector/commit_range_documents.py

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,31 @@
2222
logger = get_logger('Commit Range Collector')
2323

2424

25+
def get_safe_head_reference_for_diff(repo: 'Repo') -> str:
26+
"""Get a safe reference to use for diffing against the current HEAD.
27+
In repositories with no commits, HEAD doesn't exist, so we return the empty tree hash.
28+
29+
Args:
30+
repo: Git repository object
31+
32+
Returns:
33+
Either "HEAD" string if commits exist, or empty tree hash if no commits exist
34+
"""
35+
try:
36+
repo.rev_parse(consts.GIT_HEAD_COMMIT_REV)
37+
return consts.GIT_HEAD_COMMIT_REV
38+
except Exception as e: # actually gitdb.exc.BadObject; no import because of lazy loading
39+
logger.debug(
40+
'Repository has no commits, using empty tree hash for diffs, %s',
41+
{'repo_path': repo.working_tree_dir},
42+
exc_info=e,
43+
)
44+
45+
# Repository has no commits, use the universal empty tree hash
46+
# This is the standard Git approach for initial commits
47+
return consts.GIT_EMPTY_TREE_OBJECT
48+
49+
2550
def _does_reach_to_max_commits_to_scan_limit(commit_ids: list[str], max_commits_count: Optional[int]) -> bool:
2651
if max_commits_count is None:
2752
return False
@@ -213,7 +238,8 @@ def get_pre_commit_modified_documents(
213238
diff_documents = []
214239

215240
repo = git_proxy.get_repo(repo_path)
216-
diff_index = repo.index.diff(consts.GIT_HEAD_COMMIT_REV, create_patch=True, R=True)
241+
head_reference = get_safe_head_reference_for_diff(repo)
242+
diff_index = repo.index.diff(head_reference, create_patch=True, R=True)
217243
progress_bar.set_section_length(progress_bar_section, len(diff_index))
218244
for diff in diff_index:
219245
progress_bar.update(progress_bar_section)
@@ -228,9 +254,11 @@ def get_pre_commit_modified_documents(
228254
)
229255
)
230256

231-
file_content = _get_file_content_from_commit_diff(repo, consts.GIT_HEAD_COMMIT_REV, diff)
232-
if file_content:
233-
git_head_documents.append(Document(file_path, file_content))
257+
# Only get file content from HEAD if HEAD exists (not the empty tree hash)
258+
if head_reference == consts.GIT_HEAD_COMMIT_REV:
259+
file_content = _get_file_content_from_commit_diff(repo, head_reference, diff)
260+
if file_content:
261+
git_head_documents.append(Document(file_path, file_content))
234262

235263
if os.path.exists(file_path):
236264
file_content = get_file_content(file_path)
@@ -274,13 +302,13 @@ def parse_commit_range_sast(commit_range: str, path: str) -> tuple[Optional[str]
274302
else:
275303
# Git commands like 'git diff <commit>' compare against HEAD.
276304
from_spec = commit_range
277-
to_spec = 'HEAD'
305+
to_spec = consts.GIT_HEAD_COMMIT_REV
278306

279307
# If a spec is empty (e.g., from '..master'), default it to 'HEAD'
280308
if not from_spec:
281-
from_spec = 'HEAD'
309+
from_spec = consts.GIT_HEAD_COMMIT_REV
282310
if not to_spec:
283-
to_spec = 'HEAD'
311+
to_spec = consts.GIT_HEAD_COMMIT_REV
284312

285313
try:
286314
# Use rev_parse to resolve each specifier to its full commit SHA
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
import os
2+
import tempfile
3+
from collections.abc import Generator
4+
from contextlib import contextmanager
5+
6+
from git import Repo
7+
8+
from cycode.cli import consts
9+
from cycode.cli.files_collector.commit_range_documents import get_safe_head_reference_for_diff
10+
11+
12+
@contextmanager
13+
def git_repository(path: str) -> Generator[Repo, None, None]:
14+
"""Context manager for Git repositories that ensures proper cleanup on Windows."""
15+
repo = Repo.init(path)
16+
try:
17+
yield repo
18+
finally:
19+
# Properly close the repository to release file handles
20+
repo.close()
21+
22+
23+
@contextmanager
24+
def temporary_git_repository() -> Generator[tuple[str, Repo], None, None]:
25+
"""Combined context manager for temporary directory with Git repository."""
26+
with tempfile.TemporaryDirectory() as temp_dir, git_repository(temp_dir) as repo:
27+
yield temp_dir, repo
28+
29+
30+
class TestGetSafeHeadReferenceForDiff:
31+
"""Test the safe HEAD reference functionality for git diff operations."""
32+
33+
def test_returns_head_when_repository_has_commits(self) -> None:
34+
"""Test that HEAD is returned when the repository has existing commits."""
35+
with temporary_git_repository() as (temp_dir, repo):
36+
test_file = os.path.join(temp_dir, 'test.py')
37+
with open(test_file, 'w') as f:
38+
f.write("print('test')")
39+
40+
repo.index.add(['test.py'])
41+
repo.index.commit('Initial commit')
42+
43+
result = get_safe_head_reference_for_diff(repo)
44+
assert result == consts.GIT_HEAD_COMMIT_REV
45+
46+
def test_returns_empty_tree_hash_when_repository_has_no_commits(self) -> None:
47+
"""Test that an empty tree hash is returned when the repository has no commits."""
48+
with temporary_git_repository() as (temp_dir, repo):
49+
result = get_safe_head_reference_for_diff(repo)
50+
expected_empty_tree_hash = consts.GIT_EMPTY_TREE_OBJECT
51+
assert result == expected_empty_tree_hash
52+
53+
54+
class TestIndexDiffWithSafeHeadReference:
55+
"""Test that index.diff works correctly with the safe head reference."""
56+
57+
def test_index_diff_works_on_bare_repository(self) -> None:
58+
"""Test that index.diff works on repositories with no commits."""
59+
with temporary_git_repository() as (temp_dir, repo):
60+
test_file = os.path.join(temp_dir, 'staged_file.py')
61+
with open(test_file, 'w') as f:
62+
f.write("print('staged content')")
63+
64+
repo.index.add(['staged_file.py'])
65+
66+
head_ref = get_safe_head_reference_for_diff(repo)
67+
diff_index = repo.index.diff(head_ref, create_patch=True, R=True)
68+
69+
assert len(diff_index) == 1
70+
diff = diff_index[0]
71+
assert diff.b_path == 'staged_file.py'
72+
73+
def test_index_diff_works_on_repository_with_commits(self) -> None:
74+
"""Test that index.diff continues to work on repositories with existing commits."""
75+
with temporary_git_repository() as (temp_dir, repo):
76+
initial_file = os.path.join(temp_dir, 'initial.py')
77+
with open(initial_file, 'w') as f:
78+
f.write("print('initial')")
79+
80+
repo.index.add(['initial.py'])
81+
repo.index.commit('Initial commit')
82+
83+
new_file = os.path.join(temp_dir, 'new_file.py')
84+
with open(new_file, 'w') as f:
85+
f.write("print('new file')")
86+
87+
with open(initial_file, 'w') as f:
88+
f.write("print('modified initial')")
89+
90+
repo.index.add(['new_file.py', 'initial.py'])
91+
92+
head_ref = get_safe_head_reference_for_diff(repo)
93+
diff_index = repo.index.diff(head_ref, create_patch=True, R=True)
94+
95+
assert len(diff_index) == 2
96+
file_paths = {diff.b_path or diff.a_path for diff in diff_index}
97+
assert 'new_file.py' in file_paths
98+
assert 'initial.py' in file_paths
99+
assert head_ref == consts.GIT_HEAD_COMMIT_REV
100+
101+
def test_sequential_operations_on_same_repository(self) -> None:
102+
"""Test behavior when transitioning from bare to committed repository."""
103+
with temporary_git_repository() as (temp_dir, repo):
104+
test_file = os.path.join(temp_dir, 'test.py')
105+
with open(test_file, 'w') as f:
106+
f.write("print('test')")
107+
108+
repo.index.add(['test.py'])
109+
110+
head_ref_before = get_safe_head_reference_for_diff(repo)
111+
diff_before = repo.index.diff(head_ref_before, create_patch=True, R=True)
112+
113+
expected_empty_tree = consts.GIT_EMPTY_TREE_OBJECT
114+
assert head_ref_before == expected_empty_tree
115+
assert len(diff_before) == 1
116+
117+
repo.index.commit('First commit')
118+
119+
new_file = os.path.join(temp_dir, 'new.py')
120+
with open(new_file, 'w') as f:
121+
f.write("print('new')")
122+
123+
repo.index.add(['new.py'])
124+
125+
head_ref_after = get_safe_head_reference_for_diff(repo)
126+
diff_after = repo.index.diff(head_ref_after, create_patch=True, R=True)
127+
128+
assert head_ref_after == consts.GIT_HEAD_COMMIT_REV
129+
assert len(diff_after) == 1
130+
assert diff_after[0].b_path == 'new.py'

0 commit comments

Comments
 (0)