From 217425e9adeb0fc11268e292562aab26c0128fcd Mon Sep 17 00:00:00 2001 From: Miroslav Shubernetskiy Date: Wed, 19 Feb 2025 15:37:40 -0500 Subject: [PATCH] feat: trufflehog external tool (#489) * feat: trufflehog external tool * adding secret_scanner config module --- CHANGELOG.md | 3 + configs/co/secret_scanner.c4m | 7 + src/chalk_common.nim | 2 + src/con4mfuncs.nim | 22 ++- src/configs/base_keyspecs.c4m | 31 +++- src/configs/base_report_templates.c4m | 6 + src/configs/chalk.c42spec | 15 +- src/configs/crashoverride.c4m | 1 + src/configs/sastconfig.c4m | 2 +- src/configs/sbomconfig.c4m | 2 +- src/configs/secretscannerconfig.c4m | 173 ++++++++++++++++++ src/confload.nim | 1 + src/normalize.nim | 25 ++- src/plugins/externalTool.nim | 12 +- src/selfextract.nim | 1 + tests/functional/conf.py | 14 ++ .../valid/secret_scanner/enable_secrets.c4m | 19 ++ tests/functional/test_plugins.py | 59 +++++- tests/functional/test_sink.py | 18 +- 19 files changed, 381 insertions(+), 32 deletions(-) create mode 100644 configs/co/secret_scanner.c4m create mode 100644 src/configs/secretscannerconfig.c4m create mode 100644 tests/functional/data/configs/composable/valid/secret_scanner/enable_secrets.c4m diff --git a/CHANGELOG.md b/CHANGELOG.md index f5043dab..6d306813 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,9 @@ - `EXTERNAL_TOOL_DURATION` key which reports external tool duration for each invocation. ([#488](https://github.com/crashappsec/chalk/pull/488)) +- `run_secret_scanner_tools` configuration which then collects new + `SECRET_SCANNER` key. Currently only trufflehog is supported. + ([#489](https://github.com/crashappsec/chalk/pull/489)) ## 0.5.3 diff --git a/configs/co/secret_scanner.c4m b/configs/co/secret_scanner.c4m new file mode 100644 index 00000000..607c9a93 --- /dev/null +++ b/configs/co/secret_scanner.c4m @@ -0,0 +1,7 @@ +parameter var collect_secret_scanner { + default: true + shortdoc: "Collect secret scanner via trufflehog" + doc: "Whether secret scanner results should be collected for chalking operations via trufflehog" +} + +~run_secret_scanner_tools = collect_secret_scanner diff --git a/src/chalk_common.nim b/src/chalk_common.nim index ea4c8926..9a23f8a4 100644 --- a/src/chalk_common.nim +++ b/src/chalk_common.nim @@ -432,6 +432,7 @@ const baseConfName* = "configs/base_*.c4m" sbomConfName* = "configs/sbomconfig.c4m" sastConfName* = "configs/sastconfig.c4m" + secretsConfName* = "configs/secretscannerconfig.c4m" techStackConfName* = "configs/techstackconfig.c4m" linguistConfName* = "configs/linguist.c4m" ioConfName* = "configs/ioconfig.c4m" @@ -454,6 +455,7 @@ const staticRead("configs/buildkitcmd.c4m") sbomConfig* = staticRead(sbomConfName) sastConfig* = staticRead(sastConfName) + secretsConfig* = staticRead(secretsConfName) techStackConfig* = staticRead(techStackConfName) linguistConfig* = staticRead(linguistConfName) ioConfig* = staticRead(ioConfName) diff --git a/src/con4mfuncs.nim b/src/con4mfuncs.nim index 5f5f9dd0..9d7c6d85 100644 --- a/src/con4mfuncs.nim +++ b/src/con4mfuncs.nim @@ -123,6 +123,19 @@ proc c4mParseJson(args: seq[Box], unused = ConfigState(nil)): Option[Box] = error("Could not parse JSON: " & getCurrentExceptionMsg()) return none(Box) +proc c4mParseJsonL(args: seq[Box], unused = ConfigState(nil)): Option[Box] = + let + data = unpack[string](args[0]) + try: + var json = newJArray() + for line in data.strip().splitLines(): + json.add(parseJson(line)) + let box = nimJsonToBox(json) + return some(box) + except: + error("Could not parse JSON: " & getCurrentExceptionMsg()) + return none(Box) + proc dockerExe(args: seq[Box], unused = ConfigState(nil)): Option[Box] = return some(pack(getDockerExeLocation())) @@ -221,8 +234,13 @@ This way the function is only computed once. ("parse_json(string) -> `x", BuiltInFn(c4mParseJson), """ -Same as `url_post()`, but takes a certificate file location in the final -parameter, with which HTTPS connections must authenticate against. +Parses JSON string and returns data-struct back. +""", + @["parsing"]), + ("parse_jsonl(string) -> `x", + BuiltInFn(c4mParseJsonL), + """ +Parses JSONl string and returns data-struct back. """, @["parsing"]), ("docker_exe() -> string", diff --git a/src/configs/base_keyspecs.c4m b/src/configs/base_keyspecs.c4m index 5cd56bcc..ad32b7ef 100644 --- a/src/configs/base_keyspecs.c4m +++ b/src/configs/base_keyspecs.c4m @@ -1480,7 +1480,7 @@ keyspec SBOM { since: "0.1.0" shortdoc: "SBOM(s) collected at Chalk time" doc: """ -This field is meant to captures any SBOMs associated with a chalking +This field is meant to capture any SBOMs associated with a chalking (i.e., a chalk mark insertion operation). The value, when provided, is a dictionary. The keys of that dictionary indicate the tool used to perform the chalking, and the value consists of a free-form JSON @@ -1499,6 +1499,35 @@ collecting this information. """ } +keyspec SECRET_SCANNER { + kind: ChalkTimeArtifact + type: dict[string, dict[string, `x]] + standard: true + since: "0.5.4" + shortdoc: "Secret scanner results collected at Chalk time" + doc: """ +This field is meant to capture any secret scanning tool results while +chalking artifacts. Then value, when provided, is a dictionary. +The keys of that dictionary indicate the tool used, and the value consists of +free-form JSON object returned by the scanning tool. + +Currently only these tools are supported: + +* trufflehog. + If the chalking is done in a git repo, trufflehog is used in `git` mode + which will only scan files checked in the repo. + Otherwise trufflehog is used in `filesystem` mode where it will scan + all files within the chalking context directory. + **NOTE** that trufflehog validates all findings by checking + secrets it finds. For example for AWS keys, it will check if they + are still active creds in AWS. This reduces noise however might have + security-considerations depending on the security posture of the org. + +This is not enabled by default and `run_secret_scanner_tools` config needs +to be enabled to run secret scanners. +""" +} + keyspec EXTERNAL_TOOL_DURATION { kind: ChalkTimeArtifact type: dict[string, dict[string, int]] diff --git a/src/configs/base_report_templates.c4m b/src/configs/base_report_templates.c4m index 16019518..b18ce679 100644 --- a/src/configs/base_report_templates.c4m +++ b/src/configs/base_report_templates.c4m @@ -107,6 +107,7 @@ report and subtract from it. key.EMBEDDED_TMPDIR.use = true key.CLOUD_METADATA_WHEN_CHALKED.use = true key.EXTERNAL_TOOL_DURATION.use = true + key.SECRET_SCANNER.use = true key.SBOM.use = true key.SAST.use = true key.ERR_INFO.use = true @@ -505,6 +506,7 @@ doc: """ key.INJECTOR_ENV.use = true key.TENANT_ID_WHEN_CHALKED.use = true key.EXTERNAL_TOOL_DURATION.use = true + key.SECRET_SCANNER.use = true key.SBOM.use = true key.SAST.use = true key._ACTION_ID.use = true @@ -711,6 +713,7 @@ doc: """ key.EMBEDDED_TMPDIR.use = true key.CLOUD_METADATA_WHEN_CHALKED.use = true key.EXTERNAL_TOOL_DURATION.use = true + key.SECRET_SCANNER.use = true key.SBOM.use = true key.SAST.use = true key.ERR_INFO.use = true @@ -1015,6 +1018,7 @@ container. key.INJECTOR_ENV.use = true key.TENANT_ID_WHEN_CHALKED.use = true key.EXTERNAL_TOOL_DURATION.use = true + key.SECRET_SCANNER.use = true key.SBOM.use = true key.SAST.use = true @@ -1198,6 +1202,7 @@ container. key.EMBEDDED_TMPDIR.use = true key.CLOUD_METADATA_WHEN_CHALKED.use = true key.EXTERNAL_TOOL_DURATION.use = true + key.SECRET_SCANNER.use = true key.SBOM.use = true key.SAST.use = true key.ERR_INFO.use = true @@ -1684,6 +1689,7 @@ and keep the run-time key. key.EMBEDDED_TMPDIR.use = true key.CLOUD_METADATA_WHEN_CHALKED.use = true key.EXTERNAL_TOOL_DURATION.use = true + key.SECRET_SCANNER.use = true key.SBOM.use = true key.SAST.use = true key.ERR_INFO.use = true diff --git a/src/configs/chalk.c42spec b/src/configs/chalk.c42spec index 4838447b..155f93f6 100644 --- a/src/configs/chalk.c42spec +++ b/src/configs/chalk.c42spec @@ -21,7 +21,7 @@ all_cmds_that_insert := ["insert", "build", "push", "load", "setup"] # Beyond valid chalk commands, these can generate reports. other_report_ops := ["build", "push", "heartbeat"] -tool_types := ["sbom", "sast"] +tool_types := ["sbom", "sast", "secret_scanner"] valid_log_levels := ["verbose", "trace", "info", "warn", "error", "none"] key_types := ["Chalk-time Host", "Chalk-time Artifact", "Run-time Artifact", "Run-time Host"] @@ -3080,7 +3080,18 @@ The syft command line arguments used at invocation (minus the target location) c doc: """ When true, this will cause chalk to run any configured static analysis security testing (SAST) tools. This is off by default, since it could add a noticeable delay to build time for large code bases. -Currently, the only available tool out of the box is semgrep, and will only work on machines that either already have semgrep installed, or have Python3 installed. +Currently, the only available tool out of the box is semgrep, and will only work on machines that either already have semgrep installed, have Python3 installed, or can run docker. +""" + } + + field run_secret_scanner_tools { + type: bool + default: false + shortdoc: "Run any configured secret scanner tools" + doc: """ +When true, this will cause chalk to run any configured secret scanner tools. This is off by default. + +Currently, the only available tool out of the box is trufflehog. """ } diff --git a/src/configs/crashoverride.c4m b/src/configs/crashoverride.c4m index 1062b8ea..6c598639 100644 --- a/src/configs/crashoverride.c4m +++ b/src/configs/crashoverride.c4m @@ -223,6 +223,7 @@ This is mostly a copy of insert template however all keys are immutable. ~key.EMBEDDED_TMPDIR.use = true ~key.CLOUD_METADATA_WHEN_CHALKED.use = true ~key.EXTERNAL_TOOL_DURATION.use = true + ~key.SECRET_SCANNER.use = true ~key.SBOM.use = true ~key.SAST.use = true ~key.ERR_INFO.use = true diff --git a/src/configs/sastconfig.c4m b/src/configs/sastconfig.c4m index 365877bc..0cff4202 100644 --- a/src/configs/sastconfig.c4m +++ b/src/configs/sastconfig.c4m @@ -13,7 +13,7 @@ tool semgrep { attempt_install: func install_semgrep(string) -> bool get_command_args: func get_semgrep_args(string) -> string produce_keys: (func load_semgrep_results(string, int) -> - dict[string, string]) + dict[string, `x]) semgrep_config_profile: "auto" semgrep_format: "sarif" semgrep_metrics: "on" diff --git a/src/configs/sbomconfig.c4m b/src/configs/sbomconfig.c4m index 71ba0628..67d7517d 100644 --- a/src/configs/sbomconfig.c4m +++ b/src/configs/sbomconfig.c4m @@ -13,7 +13,7 @@ tool syft { ~attempt_install: func install_syft(string) -> bool ~get_command_args: func get_syft_args(string) -> string ~produce_keys: (func extract_syft_sbom(string, int) -> - dict[string, string]) + dict[string, `x]) syft_exe_dir: "/tmp" syft_installer: "https://raw.githubusercontent.com/anchore/syft/main/install.sh" syft_container: "anchore/syft" diff --git a/src/configs/secretscannerconfig.c4m b/src/configs/secretscannerconfig.c4m new file mode 100644 index 00000000..451e946e --- /dev/null +++ b/src/configs/secretscannerconfig.c4m @@ -0,0 +1,173 @@ +## +## Copyright (c) 2025, Crash Override, Inc. +## +## This file is part of Chalk +## (see https://crashoverride.com/docs/chalk) +## + +## Builtin Secret Scanning tool implementation(s). + +tool trufflehog { + kind: "secret_scanner" + get_tool_location: func find_trufflehog(string) -> string + attempt_install: func install_trufflehog(string) -> bool + get_command_args: func get_trufflehog_args(string) -> string + produce_keys: (func load_trufflehog_results(string, int) -> + dict[string, `x]) + trufflehog_config: "" + trufflehog_format_flags: "--json --no-github-actions" + trufflehog_other_flags: "" + trufflehog_exe_dir: "/tmp" + trufflehog_container: "trufflesecurity/trufflehog" + trufflehog_entrypoint: "trufflehog" + trufflehog_prefer_docker: false + trufflehog_installer: "https://raw.githubusercontent.com/trufflesecurity/trufflehog/main/scripts/install.sh" + doc: """ +This runs the trufflehog secret scanner. If it doesn't exist in the +path, chalk will: + +1. use docker (if present) to run trufflehog +2. otherwise, attempt to install it via trufflehog install script + +You can configure the following fields in the tool.trufflehog object: + +trufflehog_prefer_docker: When true, docker is preferred over system-installed trufflehog. + Defaults to `false`. +trufflehog_container: The name of the docker container to use to run trufflehog. + Defaults to 'trufflesecurity/trufflehog' from Docker Hub. +trufflehog_entrypoint: The entrypoint to use to run trufflehog. + Defaults to 'trufflehog'. +trufflehog_exe_dir: In addition to $PATH, where to search/install trufflehog. + Defaults to "/tmp". +trufflehog_config: The trufflehog config to use. + By default no config is provided. +trufflehog_format: The output format flag to pass. + Defaults to 'sarif'. +""" +} + +func trufflehog_docker(path) { + result := "" + if tool.trufflehog.trufflehog_entrypoint == "" or tool.trufflehog.trufflehog_container == "" { + trace("find_trufflehog: docker is disabled - both container and entrypoint must be defined") + return + } + docker_path := docker_exe() + if docker_path == "" { + trace("find_trufflehog: docker is missing; unable to use docker for trufflehog") + return + } + dir := path + if not is_dir(path) { + dir, _ := path_split(path) + } + cwd_volume := "" + if dir != cwd() { + cwd_volume := "-v " + cwd() + ":" + cwd() + " " + } + # Allow using a config from outside of cwd, such as in ~ + config_volume := "" + config := resolve_path(tool.trufflehog.trufflehog_config) + if config != "" and is_file(config) { + config_volume := "-v " + config + ":" + config + " " + } + return ( + docker_path + " run " + + "--rm " + + "--entrypoint=" + tool.trufflehog.trufflehog_entrypoint + " " + + "-w " + dir + " " + + "-v " + dir + ":" + dir + " " + + cwd_volume + + config_volume + + tool.trufflehog.trufflehog_container + ) +} + +func trufflehog_system() { + result := find_exe("trufflehog", [tool.trufflehog.trufflehog_exe_dir]) + if result == "" { + trace("find_trufflehog: Unable to find trufflehog in $PATH") + } else { + trace("find_trufflehog: found trufflehog in $PATH: " + result) + } +} + +func find_trufflehog(path) { + if tool.trufflehog.trufflehog_prefer_docker { + result := trufflehog_docker(path) + if result != "" { + return + } + } + result := trufflehog_system() + if result != "" { + return result + } + result := trufflehog_docker(path) +} + +func install_trufflehog(path) { + info("Attempting to install trufflehog from " + tool.trufflehog.trufflehog_installer) + + contents := url_get(tool.trufflehog.trufflehog_installer) + if not starts_with(contents, "#!") { + error("Trufflehog installer is not a valid shell script due to lack of shebang") + return false + } + + installer := to_tmp_file(contents, ".sh") + cmdline := "sh " + installer + " -b " + tool.trufflehog.trufflehog_exe_dir + + trace("Running: " + cmdline) + sout, code := system(cmdline) + + info(sout) + if code == 0 { + trace("Successfully installed trufflehog into: " + tool.trufflehog.trufflehog_exe_dir) + return true + } + else { + error("Unable to install trufflehog into: " + tool.trufflehog.trufflehog_exe_dir) + return false + } +} + +func get_trufflehog_args(path) { + mode := "filesystem" + prefix := "" + if is_dir(path) and is_dir(join_path(path, ".git")) { + mode := "git" + prefix := "file://" + } + result := mode + " " + if tool.trufflehog.trufflehog_config != "" { + result := result + "--config=" + tool.trufflehog.trufflehog_config + } + result := result + tool.trufflehog.trufflehog_format_flags + " " + result := result + tool.trufflehog.trufflehog_other_flags + " " + result := result + prefix + path + " 2>/dev/null" +} + +func load_trufflehog_results(out: string, code) { + result := {} + + if code != 0 { + error("trufflehog failed to run properly; ignoring") + echo(out) + return {} + } + + if strip(out) == "" { + info("trufflehog did not find any findings. ignoring") + return {} + } + + if not starts_with(strip(out), "{") { + error("trufflehog did not run properly - invalid JSON returned; ignoring") + echo(out) + return {} + } + + # trufflehog returns jsonl + return { "SECRET_SCANNER" : parse_jsonl(out) } +} diff --git a/src/confload.nim b/src/confload.nim index 1b7f5e1a..7eb9ccd4 100644 --- a/src/confload.nim +++ b/src/confload.nim @@ -176,6 +176,7 @@ proc loadAllConfigs*() = addConfLoad(attestConfName, toStream(attestConfig), checkNone). addConfLoad(sbomConfName, toStream(sbomConfig), checkNone). addConfLoad(sastConfName, toStream(sastConfig), checkNone). + addConfLoad(secretsConfName, toStream(secretsConfig), checkNone). addConfLoad(techStackConfName, toStream(techStackConfig), checkNone). addConfLoad(linguistConfName, toStream(linguistConfig), checkNone). addConfLoad(coConfName, toStream(coConfig), checkNone) diff --git a/src/normalize.nim b/src/normalize.nim index ac5a981f..3bf5e44f 100644 --- a/src/normalize.nim +++ b/src/normalize.nim @@ -34,21 +34,23 @@ proc u64ToStr(i: uint64): string = proc floatToStr(f: float): string = result = newStringOfCap(sizeof(float)+1) - let arr = cast[array[8, char]](f) proc binEncodeItem(self: Box): string + proc binEncodeStr(s: string): string = return "\x01" & u32ToStr(uint32(len(s))) & s + proc binEncodeInt(i: uint64): string = return "\x02" & u64ToStr(i) -proc binEncodeBool(b: bool): string = return if b: "\x03\x01" else: "\x03\x00" + +proc binEncodeBool(b: bool): string = + return if b: "\x03\x01" else: "\x03\x00" proc binEncodeArr(arr: seq[Box]): string = result = "\x04" & u32ToStr(uint32(len(arr))) - for item in arr: result = result & binEncodeItem(item) -proc binEncodeObj(self: ChalkDict, ignore: seq[string] = @[]): string = +proc binEncodeTable(self: ChalkDict, ignore: seq[string] = @[]): string = var encoded = "" count = 0 @@ -68,21 +70,26 @@ proc binEncodeObj(self: ChalkDict, ignore: seq[string] = @[]): string = proc binEncodeFloat(f: float): string = result = "\x06" & floatToStr(f) +proc binEncodeObj(self: Box): string = + if self.o == nil: + return "\x07" + else: + error("non-null objects cannot be normalized") + unreachable + proc binEncodeItem(self: Box): string = case self.kind of MkBool: return binEncodeBool(unpack[bool](self)) of MkInt: return binEncodeInt(unpack[uint64](self)) of MkStr: return binEncodeStr(unpack[string](self)) - of MkTable: return binEncodeObj(unpack[ChalkDict](self)) + of MkTable: return binEncodeTable(unpack[ChalkDict](self)) of MkSeq: return binEncodeArr(unpack[seq[Box]](self)) of MkFloat: return binEncodeFloat(unpack[float](self)) - else: - echo self.kind, " ", $self - unreachable + of MkObj: return binEncodeObj(self) proc normalizeChalk*(dict: ChalkDict): string = # Currently, this is only called for the METADATA_ID field, which only # signs things actually being written out. We skip MAGIC, SIGNATURE # and SIGN_PARAMS. let ignoreList = attrGet[seq[string]]("ignore_when_normalizing") - return binEncodeObj(dict, ignoreList) + return binEncodeTable(dict, ignoreList) diff --git a/src/plugins/externalTool.nim b/src/plugins/externalTool.nim index 217a333b..cfff0433 100644 --- a/src/plugins/externalTool.nim +++ b/src/plugins/externalTool.nim @@ -25,7 +25,7 @@ proc clearCallback(self: Plugin) {.cdecl.} = proc ensureRunCallback[T](cb: CallbackObj, args: seq[Box]): T = let value = runCallback(cb, args) if value.isNone(): - raise newException(ValueError, "missing implemenetation of " & $(cb)) + raise newException(ValueError, "missing implementation of " & $(cb)) return unpack[T](value.get()) proc runOneTool(info: PIInfo, path: string): ChalkDict = @@ -82,8 +82,9 @@ proc toolBase(path: string): ChalkDict = var toolInfo = initTable[string, seq[(int, PIInfo)]]() let - runSBOM = attrGet[bool]("run_sbom_tools") - runSAST = attrGet[bool]("run_sast_tools") + runSBOM = attrGet[bool]("run_sbom_tools") + runSAST = attrGet[bool]("run_sast_tools") + runSecrets = attrGet[bool]("run_secret_scanner_tools") # tools should only run during insert operations # note this is a subset of chalkable operations @@ -94,8 +95,9 @@ proc toolBase(path: string): ChalkDict = let v = "tool." & k if not attrGet[bool](v & ".enabled"): continue let kind = attrGet[string](v & ".kind") - if not runSBOM and kind == "sbom": continue - if not runSAST and kind == "sast": continue + if not runSBOM and kind == "sbom": continue + if not runSAST and kind == "sast": continue + if not runSecrets and kind == "secret_scanner": continue let tool = (attrGet[int](v & ".priority"), PIInfo(name: k)) if kind notin toolInfo: diff --git a/src/selfextract.nim b/src/selfextract.nim index 116cf37b..a914a45d 100644 --- a/src/selfextract.nim +++ b/src/selfextract.nim @@ -259,6 +259,7 @@ proc testConfigFile(newCon4m: string, addConfLoad(attestConfName, toStream(attestConfig)). addConfLoad(sbomConfName, toStream(sbomConfig)). addConfLoad(sastConfName, toStream(sastConfig)). + addConfLoad(secretsConfName, toStream(secretsConfig)). # TODO for Theo: load the internal config file for rules addConfLoad(linguistConfName, toStream(linguistConfig)). addConfLoad(techStackConfName, toStream(techStackConfig)). diff --git a/tests/functional/conf.py b/tests/functional/conf.py index 999316e2..0ec0903a 100644 --- a/tests/functional/conf.py +++ b/tests/functional/conf.py @@ -67,3 +67,17 @@ UNAME_PATH = shutil.which("uname") SLEEP_PATH = shutil.which("sleep") GDB_PATH = shutil.which("gdb") + + +AWS_ACCESS_KEY_ID = os.environ.get("AWS_ACCESS_KEY_ID", "") +AWS_SECRET_ACCESS_KEY = os.environ.get("AWS_SECRET_ACCESS_KEY", "") +AWS_SESSION_TOKEN = os.environ.get("AWS_SESSION_TOKEN", "") + + +def aws_secrets_configured() -> bool: + return all( + [ + bool(AWS_ACCESS_KEY_ID), + bool(AWS_SECRET_ACCESS_KEY), + ] + ) diff --git a/tests/functional/data/configs/composable/valid/secret_scanner/enable_secrets.c4m b/tests/functional/data/configs/composable/valid/secret_scanner/enable_secrets.c4m new file mode 100644 index 00000000..51737335 --- /dev/null +++ b/tests/functional/data/configs/composable/valid/secret_scanner/enable_secrets.c4m @@ -0,0 +1,19 @@ +run_secret_scanner_tools: true + +# `chalk insert` and `chalk docker build` output secret to terminal +report_template.terminal_insert.key.SECRET_SCANNER.use: true +report_template.insertion_default.key.SECRET_SCANNER.use: true + +# Embed SECRET_SCANNER reports in chalk marks. + +# `chalk insert` uses the mark_default template. +mark_template.mark_default.key.SECRET_SCANNER.use: true + +# `chalk docker build` uses the `minimal` template. +mark_template.minimal.key.SECRET_SCANNER.use: true + +if env("EXTERNAL_TOOL_USE_DOCKER") != "False" { + tool.trufflehog.trufflehog_prefer_docker = true +} else { + tool.trufflehog.trufflehog_entrypoint = "" +} diff --git a/tests/functional/test_plugins.py b/tests/functional/test_plugins.py index 10b8b8cd..18ec5f4c 100644 --- a/tests/functional/test_plugins.py +++ b/tests/functional/test_plugins.py @@ -10,7 +10,19 @@ import pytest from .chalk.runner import Chalk, ChalkMark -from .conf import CODEOWNERS, CONFIGS, DATA, DOCKERFILES, LS_PATH, PYS, REPO +from .conf import ( + AWS_ACCESS_KEY_ID, + AWS_SECRET_ACCESS_KEY, + AWS_SESSION_TOKEN, + CODEOWNERS, + CONFIGS, + DATA, + DOCKERFILES, + LS_PATH, + PYS, + REPO, + aws_secrets_configured, +) from .utils.dict import ANY, MISSING from .utils.docker import Docker from .utils.git import Git @@ -1011,3 +1023,48 @@ def test_semgrep( # check that sbom has been embedded into the artifact chalk_mark = ChalkMark.from_binary(tmp_data_dir / "hello.sh") assert chalk_mark.contains(mark_sast_data) + + +@pytest.mark.parametrize("use_docker", [True, False]) +@pytest.mark.skipif(not aws_secrets_configured(), reason="AWS secrets not configured") +def test_trufflehog(chalk: Chalk, tmp_data_dir: Path, use_docker: bool): + target = tmp_data_dir / "aws.sh" + target.write_text( + f""" +#!/bin/sh +export AWS_ACCESS_KEY_ID={AWS_ACCESS_KEY_ID} +export AWS_SECRET_ACCESS_KEY={AWS_SECRET_ACCESS_KEY} +export AWS_SESSION_TOKEN={AWS_SESSION_TOKEN} +""".strip() + ) + insert = chalk.insert( + artifact=target, + env={"EXTERNAL_TOOL_USE_DOCKER": str(use_docker)}, + config=( + CONFIGS / "composable" / "valid" / "secret_scanner" / "enable_secrets.c4m" + ), + ) + data = { + "trufflehog": [ + { + "SourceName": "trufflehog - filesystem", + "SourceMetadata": { + "Data": { + "Filesystem": { + "file": str(target), + "line": int, + } + } + }, + "DetectorName": re.compile(r"^AWS"), + }, + ], + } + assert insert.report.has( + # scans whole folder + SECRET_SCANNER=data, + ) + assert insert.mark.has( + # scans specific file + SECRET_SCANNER=data, + ) diff --git a/tests/functional/test_sink.py b/tests/functional/test_sink.py index 927c22f7..43a6948c 100644 --- a/tests/functional/test_sink.py +++ b/tests/functional/test_sink.py @@ -12,22 +12,20 @@ import requests from .chalk.runner import Chalk -from .conf import CAT_PATH, SERVER_CERT, SERVER_HTTP, SERVER_HTTPS, SINK_CONFIGS +from .conf import ( + CAT_PATH, + SERVER_CERT, + SERVER_HTTP, + SERVER_HTTPS, + SINK_CONFIGS, + aws_secrets_configured, +) from .utils.log import get_logger logger = get_logger() -def aws_secrets_configured() -> bool: - return all( - [ - bool(os.environ.get("AWS_ACCESS_KEY_ID", "")), - bool(os.environ.get("AWS_SECRET_ACCESS_KEY", "")), - ] - ) - - # validates some basic fields on the chalk output, which should be all the same # since we will only be chalking one target def _validate_chalk(