Skip to content

Commit 1e6c67d

Browse files
committed
CM-51935 - Fix pre-commit hook for bare repositories
1 parent d7c17b9 commit 1e6c67d

File tree

4 files changed

+160
-8
lines changed

4 files changed

+160
-8
lines changed

cycode/cli/apps/scan/commit_range_scanner.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
get_diff_file_content,
2626
get_diff_file_path,
2727
get_pre_commit_modified_documents,
28+
get_safe_head_reference_for_diff,
2829
parse_commit_range_sast,
2930
parse_commit_range_sca,
3031
)
@@ -271,7 +272,9 @@ def _scan_sca_pre_commit(ctx: typer.Context, repo_path: str) -> None:
271272

272273
def _scan_secret_pre_commit(ctx: typer.Context, repo_path: str) -> None:
273274
progress_bar = ctx.obj['progress_bar']
274-
diff_index = git_proxy.get_repo(repo_path).index.diff(consts.GIT_HEAD_COMMIT_REV, create_patch=True, R=True)
275+
repo = git_proxy.get_repo(repo_path)
276+
head_reference = get_safe_head_reference_for_diff(repo)
277+
diff_index = repo.index.diff(head_reference, create_patch=True, R=True)
275278

276279
progress_bar.set_section_length(ScanProgressBarSection.PREPARE_LOCAL_FILES, len(diff_index))
277280

cycode/cli/consts.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,7 @@
261261
# git consts
262262
COMMIT_DIFF_DELETED_FILE_CHANGE_TYPE = 'D'
263263
GIT_HEAD_COMMIT_REV = 'HEAD'
264+
GIT_EMPTY_TREE_OBJECT = '4b825dc642cb6eb9a060e54bf8d69288fbee4904'
264265
EMPTY_COMMIT_SHA = '0000000000000000000000000000000000000000'
265266
GIT_PUSH_OPTION_COUNT_ENV_VAR_NAME = 'GIT_PUSH_OPTION_COUNT'
266267
GIT_PUSH_OPTION_ENV_VAR_PREFIX = 'GIT_PUSH_OPTION_'

cycode/cli/files_collector/commit_range_documents.py

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,31 @@
2222
logger = get_logger('Commit Range Collector')
2323

2424

25+
def get_safe_head_reference_for_diff(repo: 'Repo') -> str:
26+
"""Get a safe reference to use for diffing against the current HEAD.
27+
In repositories with no commits, HEAD doesn't exist, so we return the empty tree hash.
28+
29+
Args:
30+
repo: Git repository object
31+
32+
Returns:
33+
Either "HEAD" string if commits exist, or empty tree hash if no commits exist
34+
"""
35+
try:
36+
repo.rev_parse(consts.GIT_HEAD_COMMIT_REV)
37+
return consts.GIT_HEAD_COMMIT_REV
38+
except Exception as e: # actually gitdb.exc.BadObject; no import because of lazy loading
39+
logger.debug(
40+
'Repository has no commits, using empty tree hash for diffs, %s',
41+
{'repo_path': repo.working_tree_dir},
42+
exc_info=e,
43+
)
44+
45+
# Repository has no commits, use the universal empty tree hash
46+
# This is the standard Git approach for initial commits
47+
return consts.GIT_EMPTY_TREE_OBJECT
48+
49+
2550
def _does_reach_to_max_commits_to_scan_limit(commit_ids: list[str], max_commits_count: Optional[int]) -> bool:
2651
if max_commits_count is None:
2752
return False
@@ -213,7 +238,8 @@ def get_pre_commit_modified_documents(
213238
diff_documents = []
214239

215240
repo = git_proxy.get_repo(repo_path)
216-
diff_index = repo.index.diff(consts.GIT_HEAD_COMMIT_REV, create_patch=True, R=True)
241+
head_reference = get_safe_head_reference_for_diff(repo)
242+
diff_index = repo.index.diff(head_reference, create_patch=True, R=True)
217243
progress_bar.set_section_length(progress_bar_section, len(diff_index))
218244
for diff in diff_index:
219245
progress_bar.update(progress_bar_section)
@@ -228,9 +254,11 @@ def get_pre_commit_modified_documents(
228254
)
229255
)
230256

231-
file_content = _get_file_content_from_commit_diff(repo, consts.GIT_HEAD_COMMIT_REV, diff)
232-
if file_content:
233-
git_head_documents.append(Document(file_path, file_content))
257+
# Only get file content from HEAD if HEAD exists (not the empty tree hash)
258+
if head_reference == consts.GIT_HEAD_COMMIT_REV:
259+
file_content = _get_file_content_from_commit_diff(repo, head_reference, diff)
260+
if file_content:
261+
git_head_documents.append(Document(file_path, file_content))
234262

235263
if os.path.exists(file_path):
236264
file_content = get_file_content(file_path)
@@ -274,13 +302,13 @@ def parse_commit_range_sast(commit_range: str, path: str) -> tuple[Optional[str]
274302
else:
275303
# Git commands like 'git diff <commit>' compare against HEAD.
276304
from_spec = commit_range
277-
to_spec = 'HEAD'
305+
to_spec = consts.GIT_HEAD_COMMIT_REV
278306

279307
# If a spec is empty (e.g., from '..master'), default it to 'HEAD'
280308
if not from_spec:
281-
from_spec = 'HEAD'
309+
from_spec = consts.GIT_HEAD_COMMIT_REV
282310
if not to_spec:
283-
to_spec = 'HEAD'
311+
to_spec = consts.GIT_HEAD_COMMIT_REV
284312

285313
try:
286314
# Use rev_parse to resolve each specifier to its full commit SHA
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
import os
2+
import tempfile
3+
4+
from git import Repo
5+
6+
from cycode.cli import consts
7+
from cycode.cli.files_collector.commit_range_documents import get_safe_head_reference_for_diff
8+
9+
10+
class TestGetSafeHeadReferenceForDiff:
11+
"""Test the safe HEAD reference functionality for git diff operations."""
12+
13+
def test_returns_head_when_repository_has_commits(self) -> None:
14+
"""Test that HEAD is returned when the repository has existing commits."""
15+
with tempfile.TemporaryDirectory() as temp_dir:
16+
repo = Repo.init(temp_dir)
17+
18+
test_file = os.path.join(temp_dir, 'test.py')
19+
with open(test_file, 'w') as f:
20+
f.write("print('test')")
21+
22+
repo.index.add(['test.py'])
23+
repo.index.commit('Initial commit')
24+
25+
result = get_safe_head_reference_for_diff(repo)
26+
assert result == consts.GIT_HEAD_COMMIT_REV
27+
28+
def test_returns_empty_tree_hash_when_repository_has_no_commits(self) -> None:
29+
"""Test that an empty tree hash is returned when the repository has no commits."""
30+
with tempfile.TemporaryDirectory() as temp_dir:
31+
repo = Repo.init(temp_dir)
32+
33+
result = get_safe_head_reference_for_diff(repo)
34+
expected_empty_tree_hash = consts.GIT_EMPTY_TREE_OBJECT
35+
assert result == expected_empty_tree_hash
36+
37+
38+
class TestIndexDiffWithSafeHeadReference:
39+
"""Test that index.diff works correctly with the safe head reference."""
40+
41+
def test_index_diff_works_on_bare_repository(self) -> None:
42+
"""Test that index.diff works on repositories with no commits."""
43+
with tempfile.TemporaryDirectory() as temp_dir:
44+
repo = Repo.init(temp_dir)
45+
46+
test_file = os.path.join(temp_dir, 'staged_file.py')
47+
with open(test_file, 'w') as f:
48+
f.write("print('staged content')")
49+
50+
repo.index.add(['staged_file.py'])
51+
52+
head_ref = get_safe_head_reference_for_diff(repo)
53+
diff_index = repo.index.diff(head_ref, create_patch=True, R=True)
54+
55+
assert len(diff_index) == 1
56+
diff = diff_index[0]
57+
assert diff.b_path == 'staged_file.py'
58+
59+
def test_index_diff_works_on_repository_with_commits(self) -> None:
60+
"""Test that index.diff continues to work on repositories with existing commits."""
61+
with tempfile.TemporaryDirectory() as temp_dir:
62+
repo = Repo.init(temp_dir)
63+
64+
initial_file = os.path.join(temp_dir, 'initial.py')
65+
with open(initial_file, 'w') as f:
66+
f.write("print('initial')")
67+
68+
repo.index.add(['initial.py'])
69+
repo.index.commit('Initial commit')
70+
71+
new_file = os.path.join(temp_dir, 'new_file.py')
72+
with open(new_file, 'w') as f:
73+
f.write("print('new file')")
74+
75+
with open(initial_file, 'w') as f:
76+
f.write("print('modified initial')")
77+
78+
repo.index.add(['new_file.py', 'initial.py'])
79+
80+
head_ref = get_safe_head_reference_for_diff(repo)
81+
diff_index = repo.index.diff(head_ref, create_patch=True, R=True)
82+
83+
assert len(diff_index) == 2
84+
file_paths = {diff.b_path or diff.a_path for diff in diff_index}
85+
assert 'new_file.py' in file_paths
86+
assert 'initial.py' in file_paths
87+
assert head_ref == consts.GIT_HEAD_COMMIT_REV
88+
89+
def test_sequential_operations_on_same_repository(self) -> None:
90+
"""Test behavior when transitioning from bare to committed repository."""
91+
with tempfile.TemporaryDirectory() as temp_dir:
92+
repo = Repo.init(temp_dir)
93+
94+
test_file = os.path.join(temp_dir, 'test.py')
95+
with open(test_file, 'w') as f:
96+
f.write("print('test')")
97+
98+
repo.index.add(['test.py'])
99+
100+
head_ref_before = get_safe_head_reference_for_diff(repo)
101+
diff_before = repo.index.diff(head_ref_before, create_patch=True, R=True)
102+
103+
expected_empty_tree = consts.GIT_EMPTY_TREE_OBJECT
104+
assert head_ref_before == expected_empty_tree
105+
assert len(diff_before) == 1
106+
107+
repo.index.commit('First commit')
108+
109+
new_file = os.path.join(temp_dir, 'new.py')
110+
with open(new_file, 'w') as f:
111+
f.write("print('new')")
112+
113+
repo.index.add(['new.py'])
114+
115+
head_ref_after = get_safe_head_reference_for_diff(repo)
116+
diff_after = repo.index.diff(head_ref_after, create_patch=True, R=True)
117+
118+
assert head_ref_after == consts.GIT_HEAD_COMMIT_REV
119+
assert len(diff_after) == 1
120+
assert diff_after[0].b_path == 'new.py'

0 commit comments

Comments
 (0)