Skip to content

Commit

Permalink
fix: run external tools on git repo folder
Browse files Browse the repository at this point in the history
When running external tool for `chalk insert` it should run on a git
repo, not on the path of the chalked artifact. For example:

```
chalk insert hello.py
```

Should run `semgrep` on the git repo, not `hello.py`.
  • Loading branch information
miki725 committed Feb 10, 2025
1 parent 64c1976 commit 245d85a
Show file tree
Hide file tree
Showing 7 changed files with 92 additions and 14 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@

## On the `main` branch

### Fixes

- `chalk insert` was running external tools on the exact path
being chalked. For example `chalk insert hello.py` would run `semgrep`
on `hello.py`. Now chalk will compute nearest `git` repository
and run external tools on it instead.
([#485](https://github.com/crashappsec/chalk/pull/485))

## 0.5.3

**Feb 3, 2025**
Expand Down
28 changes: 22 additions & 6 deletions src/plugins/externalTool.nim
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import std/[algorithm, sequtils, sets]
import ".."/[config, plugin_api, util]
import "."/[vctlGit]

type
AlreadyRanError = object of CatchableError
Expand Down Expand Up @@ -75,6 +76,7 @@ proc runOneTool(info: PIInfo, path: string): ChalkDict =
return d

template toolBase(path: string) {.dirty.} =
let resolved = path.resolvePath()
result = ChalkDict()

var
Expand Down Expand Up @@ -104,7 +106,7 @@ template toolBase(path: string) {.dirty.} =
for k, v in toolInfo:
for (ignore, info) in v.sorted():
try:
let data = info.runOneTool(path)
let data = info.runOneTool(resolved)
# merge multiple tools into a single structure
# for example first tool returns:
# { SBOM: { foo: {...} } }
Expand All @@ -116,21 +118,35 @@ template toolBase(path: string) {.dirty.} =
if len(data) >= 0 and attrGet[bool]("tool." & info.name & ".stop_on_success"):
break
except AlreadyRanError:
trace(info.name & ": already ran for " & path & ". skipping")
trace(info.name & ": already ran for " & resolved & ". skipping")
except:
error(info.name & ": " & getCurrentExceptionMsg())

proc getToolPath(path: string): string =
let
git = getPluginByName("vctl_git")
try:
return git.getRepoFor(path)
except KeyError:
return path

proc toolGetChalkTimeHostInfo(self: Plugin): ChalkDict {.cdecl.} =
toolBase(resolvePath(getContextDirectories()[0]))
for c in getContextDirectories():
toolBase(getToolPath(c))
# only care about first context
break

proc toolGetChalkTimeArtifactInfo(self: Plugin, obj: ChalkObj):
ChalkDict {.cdecl.} =
if obj.fsRef != "":
toolBase(resolvePath(obj.fsRef))
toolBase(obj.fsRef)
elif getCommandName() == "build":
toolBase(resolvePath(getContextDirectories()[0]))
for c in getContextDirectories():
toolBase(getToolPath(c))
# only care about first context
break
else:
toolBase(resolvePath(obj.name))
toolBase(obj.name)

proc loadExternalTool*() =
newPlugin("tool",
Expand Down
13 changes: 13 additions & 0 deletions src/plugins/vctlGit.nim
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,7 @@ type
branchName: Option[string]
commitId: Option[string]
origin: Option[string]
repos: OrderedTable[string, string]
vcsDirs: OrderedTable[string, RepoInfo]

proc clearCallback(self: Plugin) {.cdecl.} =
Expand Down Expand Up @@ -760,6 +761,7 @@ proc findAndLoad(plugin: GitInfo, path: string) =
dumpExOnDebug()

plugin.vcsDirs[vcsDir] = info
plugin.repos[path] = vcsDir.parentDir()

proc setVcsKeys(chalkDict: ChalkDict, info: RepoInfo, prefix = "") =
if prefix == "":
Expand Down Expand Up @@ -803,6 +805,17 @@ proc gitInit(self: Plugin) =
for path in getContextDirectories():
cache.findAndLoad(path.resolvePath())

proc getRepoFor*(self: Plugin, path: string): string =
self.gitInit()
let
cache = GitInfo(self.internalState)
resolved = path.resolvePath()
if resolved in cache.repos:
return cache.repos[resolved]
else:
trace("git: " & path & " is not inside git repo")
raise newException(KeyError, "not in git repo")

proc gitFirstDir*(self: Plugin): string =
self.gitInit()
let cache = GitInfo(self.internalState)
Expand Down
12 changes: 11 additions & 1 deletion src/run_management.nim
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,17 @@ proc addUnmarked*(s: string) =
collectionCtx.unmarked.add(s)
proc setContextDirectories*(l: seq[string]) =
# Used for 'where to look for stuff' plugins, particularly version control.
collectionCtx.contextDirectories = l
var dirs = newSeq[string]()
for i in l:
dirs.add(
# if its a file, normalize to its parent folder
# as the context should be a directory
if i.fileExists():
i.parentDir()
else:
i
)
collectionCtx.contextDirectories = dirs
proc getContextDirectories*(): seq[string] =
collectionCtx.contextDirectories

Expand Down
1 change: 1 addition & 0 deletions tests/functional/data/python/sample_1/hello.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
#!/bin/sh
35 changes: 29 additions & 6 deletions tests/functional/test_plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -916,7 +916,7 @@ def test_semgrep(
)

# expected sast output with custom rule
sast_data = {
report_sast_data = {
"SAST": {
"semgrep": {
"runs": [
Expand Down Expand Up @@ -966,17 +966,40 @@ def test_semgrep(
}
}
}
mark_sast_data = {
"SAST": {
"semgrep": {
"runs": [
{
"invocations": [
{
"executionSuccessful": True,
}
],
"results": [],
"tool": {
"driver": {
"name": "Semgrep OSS",
"semanticVersion": ANY,
}
},
}
],
}
}
}

insert = chalk.insert(
artifact=tmp_data_dir, env={"EXTERNAL_TOOL_USE_DOCKER": str(use_docker)}
artifact=tmp_data_dir / "hello.sh",
env={"EXTERNAL_TOOL_USE_DOCKER": str(use_docker)},
)
assert insert.marks_by_path.contains({str(tmp_data_dir / "helloworld.py"): {}})
assert insert.report.contains(sast_data)
assert insert.marks_by_path.contains({str(tmp_data_dir / "hello.sh"): {}})
assert insert.report.contains(report_sast_data)
if use_docker:
assert "semgrep/semgrep" in insert.logs
else:
assert "semgrep/semgrep" not in insert.logs

# check that sbom has been embedded into the artifact
chalk_mark = ChalkMark.from_binary(tmp_data_dir / "helloworld.py")
assert chalk_mark.contains(sast_data)
chalk_mark = ChalkMark.from_binary(tmp_data_dir / "hello.sh")
assert chalk_mark.contains(mark_sast_data)
9 changes: 8 additions & 1 deletion tests/functional/utils/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,14 @@ def path_processor(
return event_dict


RENDERER = structlog.dev.ConsoleRenderer()
class Console(structlog.dev.ConsoleRenderer):
def _repr(self, val: Any) -> str:
if isinstance(val, str):
return val
return repr(val)


RENDERER = Console()

LEVEL = (os.environ.get("LOG_LEVEL") or "INFO").upper()

Expand Down

0 comments on commit 245d85a

Please sign in to comment.