diff --git a/CHANGELOG.md b/CHANGELOG.md index 492a3fb3..7a4efa01 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,14 @@ ## On the `main` branch +### Fixes + +- `chalk insert` was running external tools on the exact path + being chalked. For example `chalk insert hello.py` would run `semgrep` + on `hello.py`. Now chalk will compute nearest `git` repository + and run external tools on it instead. + ([#485](https://github.com/crashappsec/chalk/pull/485)) + ## 0.5.3 **Feb 3, 2025** diff --git a/src/plugins/externalTool.nim b/src/plugins/externalTool.nim index 4cc39642..53922cca 100644 --- a/src/plugins/externalTool.nim +++ b/src/plugins/externalTool.nim @@ -10,6 +10,7 @@ import std/[algorithm, sequtils, sets] import ".."/[config, plugin_api, util] +import "."/[vctlGit] type AlreadyRanError = object of CatchableError @@ -75,6 +76,7 @@ proc runOneTool(info: PIInfo, path: string): ChalkDict = return d template toolBase(path: string) {.dirty.} = + let resolved = path.resolvePath() result = ChalkDict() var @@ -104,7 +106,7 @@ template toolBase(path: string) {.dirty.} = for k, v in toolInfo: for (ignore, info) in v.sorted(): try: - let data = info.runOneTool(path) + let data = info.runOneTool(resolved) # merge multiple tools into a single structure # for example first tool returns: # { SBOM: { foo: {...} } } @@ -116,21 +118,35 @@ template toolBase(path: string) {.dirty.} = if len(data) >= 0 and attrGet[bool]("tool." & info.name & ".stop_on_success"): break except AlreadyRanError: - trace(info.name & ": already ran for " & path & ". skipping") + trace(info.name & ": already ran for " & resolved & ". skipping") except: error(info.name & ": " & getCurrentExceptionMsg()) +proc getToolPath(path: string): string = + let + git = getPluginByName("vctl_git") + try: + return git.getRepoFor(path) + except KeyError: + return path + proc toolGetChalkTimeHostInfo(self: Plugin): ChalkDict {.cdecl.} = - toolBase(resolvePath(getContextDirectories()[0])) + for c in getContextDirectories(): + toolBase(getToolPath(c)) + # only care about first context + break proc toolGetChalkTimeArtifactInfo(self: Plugin, obj: ChalkObj): ChalkDict {.cdecl.} = if obj.fsRef != "": - toolBase(resolvePath(obj.fsRef)) + toolBase(obj.fsRef) elif getCommandName() == "build": - toolBase(resolvePath(getContextDirectories()[0])) + for c in getContextDirectories(): + toolBase(getToolPath(c)) + # only care about first context + break else: - toolBase(resolvePath(obj.name)) + toolBase(obj.name) proc loadExternalTool*() = newPlugin("tool", diff --git a/src/plugins/vctlGit.nim b/src/plugins/vctlGit.nim index d41aff90..bc560203 100644 --- a/src/plugins/vctlGit.nim +++ b/src/plugins/vctlGit.nim @@ -272,6 +272,7 @@ type branchName: Option[string] commitId: Option[string] origin: Option[string] + repos: OrderedTable[string, string] vcsDirs: OrderedTable[string, RepoInfo] proc clearCallback(self: Plugin) {.cdecl.} = @@ -760,6 +761,7 @@ proc findAndLoad(plugin: GitInfo, path: string) = dumpExOnDebug() plugin.vcsDirs[vcsDir] = info + plugin.repos[path] = vcsDir.parentDir() proc setVcsKeys(chalkDict: ChalkDict, info: RepoInfo, prefix = "") = if prefix == "": @@ -803,6 +805,17 @@ proc gitInit(self: Plugin) = for path in getContextDirectories(): cache.findAndLoad(path.resolvePath()) +proc getRepoFor*(self: Plugin, path: string): string = + self.gitInit() + let + cache = GitInfo(self.internalState) + resolved = path.resolvePath() + if resolved in cache.repos: + return cache.repos[resolved] + else: + trace("git: " & path & " is not inside git repo") + raise newException(KeyError, "not in git repo") + proc gitFirstDir*(self: Plugin): string = self.gitInit() let cache = GitInfo(self.internalState) diff --git a/src/run_management.nim b/src/run_management.nim index 15971026..c36176ec 100644 --- a/src/run_management.nim +++ b/src/run_management.nim @@ -140,7 +140,17 @@ proc addUnmarked*(s: string) = collectionCtx.unmarked.add(s) proc setContextDirectories*(l: seq[string]) = # Used for 'where to look for stuff' plugins, particularly version control. - collectionCtx.contextDirectories = l + var dirs = newSeq[string]() + for i in l: + dirs.add( + # if its a file, normalize to its parent folder + # as the context should be a directory + if i.fileExists(): + i.parentDir() + else: + i + ) + collectionCtx.contextDirectories = dirs proc getContextDirectories*(): seq[string] = collectionCtx.contextDirectories diff --git a/tests/functional/data/python/sample_1/hello.sh b/tests/functional/data/python/sample_1/hello.sh new file mode 100755 index 00000000..1a248525 --- /dev/null +++ b/tests/functional/data/python/sample_1/hello.sh @@ -0,0 +1 @@ +#!/bin/sh diff --git a/tests/functional/test_plugins.py b/tests/functional/test_plugins.py index f70833a5..0dd862b2 100644 --- a/tests/functional/test_plugins.py +++ b/tests/functional/test_plugins.py @@ -916,7 +916,7 @@ def test_semgrep( ) # expected sast output with custom rule - sast_data = { + report_sast_data = { "SAST": { "semgrep": { "runs": [ @@ -966,17 +966,40 @@ def test_semgrep( } } } + mark_sast_data = { + "SAST": { + "semgrep": { + "runs": [ + { + "invocations": [ + { + "executionSuccessful": True, + } + ], + "results": [], + "tool": { + "driver": { + "name": "Semgrep OSS", + "semanticVersion": ANY, + } + }, + } + ], + } + } + } insert = chalk.insert( - artifact=tmp_data_dir, env={"EXTERNAL_TOOL_USE_DOCKER": str(use_docker)} + artifact=tmp_data_dir / "hello.sh", + env={"EXTERNAL_TOOL_USE_DOCKER": str(use_docker)}, ) - assert insert.marks_by_path.contains({str(tmp_data_dir / "helloworld.py"): {}}) - assert insert.report.contains(sast_data) + assert insert.marks_by_path.contains({str(tmp_data_dir / "hello.sh"): {}}) + assert insert.report.contains(report_sast_data) if use_docker: assert "semgrep/semgrep" in insert.logs else: assert "semgrep/semgrep" not in insert.logs # check that sbom has been embedded into the artifact - chalk_mark = ChalkMark.from_binary(tmp_data_dir / "helloworld.py") - assert chalk_mark.contains(sast_data) + chalk_mark = ChalkMark.from_binary(tmp_data_dir / "hello.sh") + assert chalk_mark.contains(mark_sast_data) diff --git a/tests/functional/utils/log.py b/tests/functional/utils/log.py index b0d2c86e..52799a51 100644 --- a/tests/functional/utils/log.py +++ b/tests/functional/utils/log.py @@ -51,7 +51,14 @@ def path_processor( return event_dict -RENDERER = structlog.dev.ConsoleRenderer() +class Console(structlog.dev.ConsoleRenderer): + def _repr(self, val: Any) -> str: + if isinstance(val, str): + return val + return repr(val) + + +RENDERER = Console() LEVEL = (os.environ.get("LOG_LEVEL") or "INFO").upper()