From 217425e9adeb0fc11268e292562aab26c0128fcd Mon Sep 17 00:00:00 2001
From: Miroslav Shubernetskiy <miroslav@miki725.com>
Date: Wed, 19 Feb 2025 15:37:40 -0500
Subject: [PATCH] feat: trufflehog external tool (#489)

* feat: trufflehog external tool

* adding secret_scanner config module
---
 CHANGELOG.md                                  |   3 +
 configs/co/secret_scanner.c4m                 |   7 +
 src/chalk_common.nim                          |   2 +
 src/con4mfuncs.nim                            |  22 ++-
 src/configs/base_keyspecs.c4m                 |  31 +++-
 src/configs/base_report_templates.c4m         |   6 +
 src/configs/chalk.c42spec                     |  15 +-
 src/configs/crashoverride.c4m                 |   1 +
 src/configs/sastconfig.c4m                    |   2 +-
 src/configs/sbomconfig.c4m                    |   2 +-
 src/configs/secretscannerconfig.c4m           | 173 ++++++++++++++++++
 src/confload.nim                              |   1 +
 src/normalize.nim                             |  25 ++-
 src/plugins/externalTool.nim                  |  12 +-
 src/selfextract.nim                           |   1 +
 tests/functional/conf.py                      |  14 ++
 .../valid/secret_scanner/enable_secrets.c4m   |  19 ++
 tests/functional/test_plugins.py              |  59 +++++-
 tests/functional/test_sink.py                 |  18 +-
 19 files changed, 381 insertions(+), 32 deletions(-)
 create mode 100644 configs/co/secret_scanner.c4m
 create mode 100644 src/configs/secretscannerconfig.c4m
 create mode 100644 tests/functional/data/configs/composable/valid/secret_scanner/enable_secrets.c4m

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f5043dab..6d306813 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -26,6 +26,9 @@
 - `EXTERNAL_TOOL_DURATION` key which reports external tool duration
   for each invocation.
   ([#488](https://github.com/crashappsec/chalk/pull/488))
+- `run_secret_scanner_tools` configuration which then collects new
+  `SECRET_SCANNER` key. Currently only trufflehog is supported.
+  ([#489](https://github.com/crashappsec/chalk/pull/489))
 
 ## 0.5.3
 
diff --git a/configs/co/secret_scanner.c4m b/configs/co/secret_scanner.c4m
new file mode 100644
index 00000000..607c9a93
--- /dev/null
+++ b/configs/co/secret_scanner.c4m
@@ -0,0 +1,7 @@
+parameter var collect_secret_scanner {
+  default:  true
+  shortdoc: "Collect secret scanner via trufflehog"
+  doc:      "Whether secret scanner results should be collected for chalking operations via trufflehog"
+}
+
+~run_secret_scanner_tools = collect_secret_scanner
diff --git a/src/chalk_common.nim b/src/chalk_common.nim
index ea4c8926..9a23f8a4 100644
--- a/src/chalk_common.nim
+++ b/src/chalk_common.nim
@@ -432,6 +432,7 @@ const
   baseConfName*       = "configs/base_*.c4m"
   sbomConfName*       = "configs/sbomconfig.c4m"
   sastConfName*       = "configs/sastconfig.c4m"
+  secretsConfName*    = "configs/secretscannerconfig.c4m"
   techStackConfName*  = "configs/techstackconfig.c4m"
   linguistConfName*  = "configs/linguist.c4m"
   ioConfName*         = "configs/ioconfig.c4m"
@@ -454,6 +455,7 @@ const
                         staticRead("configs/buildkitcmd.c4m")
   sbomConfig*         = staticRead(sbomConfName)
   sastConfig*         = staticRead(sastConfName)
+  secretsConfig*      = staticRead(secretsConfName)
   techStackConfig*    = staticRead(techStackConfName)
   linguistConfig*     = staticRead(linguistConfName)
   ioConfig*           = staticRead(ioConfName)
diff --git a/src/con4mfuncs.nim b/src/con4mfuncs.nim
index 5f5f9dd0..9d7c6d85 100644
--- a/src/con4mfuncs.nim
+++ b/src/con4mfuncs.nim
@@ -123,6 +123,19 @@ proc c4mParseJson(args: seq[Box], unused = ConfigState(nil)): Option[Box] =
     error("Could not parse JSON: " & getCurrentExceptionMsg())
     return none(Box)
 
+proc c4mParseJsonL(args: seq[Box], unused = ConfigState(nil)): Option[Box] =
+  let
+    data = unpack[string](args[0])
+  try:
+    var json = newJArray()
+    for line in data.strip().splitLines():
+      json.add(parseJson(line))
+    let box  = nimJsonToBox(json)
+    return some(box)
+  except:
+    error("Could not parse JSON: " & getCurrentExceptionMsg())
+    return none(Box)
+
 proc dockerExe(args: seq[Box], unused = ConfigState(nil)): Option[Box] =
   return some(pack(getDockerExeLocation()))
 
@@ -221,8 +234,13 @@ This way the function is only computed once.
     ("parse_json(string) -> `x",
      BuiltInFn(c4mParseJson),
      """
-Same as `url_post()`, but takes a certificate file location in the final
-parameter, with which HTTPS connections must authenticate against.
+Parses JSON string and returns data-struct back.
+""",
+     @["parsing"]),
+    ("parse_jsonl(string) -> `x",
+     BuiltInFn(c4mParseJsonL),
+     """
+Parses JSONl string and returns data-struct back.
 """,
      @["parsing"]),
      ("docker_exe() -> string",
diff --git a/src/configs/base_keyspecs.c4m b/src/configs/base_keyspecs.c4m
index 5cd56bcc..ad32b7ef 100644
--- a/src/configs/base_keyspecs.c4m
+++ b/src/configs/base_keyspecs.c4m
@@ -1480,7 +1480,7 @@ keyspec SBOM {
     since:            "0.1.0"
     shortdoc: "SBOM(s) collected at Chalk time"
     doc:              """
-This field is meant to captures any SBOMs associated with a chalking
+This field is meant to capture any SBOMs associated with a chalking
 (i.e., a chalk mark insertion operation). The value, when provided, is
 a dictionary. The keys of that dictionary indicate the tool used to
 perform the chalking, and the value consists of a free-form JSON
@@ -1499,6 +1499,35 @@ collecting this information.
 """
 }
 
+keyspec SECRET_SCANNER {
+    kind:             ChalkTimeArtifact
+    type:             dict[string, dict[string, `x]]
+    standard:         true
+    since:            "0.5.4"
+    shortdoc:         "Secret scanner results collected at Chalk time"
+    doc:              """
+This field is meant to capture any secret scanning tool results while
+chalking artifacts. Then value, when provided, is a dictionary.
+The keys of that dictionary indicate the tool used, and the value consists of
+free-form JSON object returned by the scanning tool.
+
+Currently only these tools are supported:
+
+* trufflehog.
+  If the chalking is done in a git repo, trufflehog is used in `git` mode
+  which will only scan files checked in the repo.
+  Otherwise trufflehog is used in `filesystem` mode where it will scan
+  all files within the chalking context directory.
+  **NOTE** that trufflehog validates all findings by checking
+  secrets it finds. For example for AWS keys, it will check if they
+  are still active creds in AWS. This reduces noise however might have
+  security-considerations depending on the security posture of the org.
+
+This is not enabled by default and `run_secret_scanner_tools` config needs
+to be enabled to run secret scanners.
+"""
+}
+
 keyspec EXTERNAL_TOOL_DURATION {
     kind:             ChalkTimeArtifact
     type:             dict[string, dict[string, int]]
diff --git a/src/configs/base_report_templates.c4m b/src/configs/base_report_templates.c4m
index 16019518..b18ce679 100644
--- a/src/configs/base_report_templates.c4m
+++ b/src/configs/base_report_templates.c4m
@@ -107,6 +107,7 @@ report and subtract from it.
   key.EMBEDDED_TMPDIR.use                     = true
   key.CLOUD_METADATA_WHEN_CHALKED.use         = true
   key.EXTERNAL_TOOL_DURATION.use              = true
+  key.SECRET_SCANNER.use                      = true
   key.SBOM.use                                = true
   key.SAST.use                                = true
   key.ERR_INFO.use                            = true
@@ -505,6 +506,7 @@ doc: """
   key.INJECTOR_ENV.use                        = true
   key.TENANT_ID_WHEN_CHALKED.use              = true
   key.EXTERNAL_TOOL_DURATION.use              = true
+  key.SECRET_SCANNER.use                      = true
   key.SBOM.use                                = true
   key.SAST.use                                = true
   key._ACTION_ID.use                          = true
@@ -711,6 +713,7 @@ doc: """
   key.EMBEDDED_TMPDIR.use                     = true
   key.CLOUD_METADATA_WHEN_CHALKED.use         = true
   key.EXTERNAL_TOOL_DURATION.use              = true
+  key.SECRET_SCANNER.use                      = true
   key.SBOM.use                                = true
   key.SAST.use                                = true
   key.ERR_INFO.use                            = true
@@ -1015,6 +1018,7 @@ container.
   key.INJECTOR_ENV.use                        = true
   key.TENANT_ID_WHEN_CHALKED.use              = true
   key.EXTERNAL_TOOL_DURATION.use              = true
+  key.SECRET_SCANNER.use                      = true
   key.SBOM.use                                = true
   key.SAST.use                                = true
 
@@ -1198,6 +1202,7 @@ container.
   key.EMBEDDED_TMPDIR.use                     = true
   key.CLOUD_METADATA_WHEN_CHALKED.use         = true
   key.EXTERNAL_TOOL_DURATION.use              = true
+  key.SECRET_SCANNER.use                      = true
   key.SBOM.use                                = true
   key.SAST.use                                = true
   key.ERR_INFO.use                            = true
@@ -1684,6 +1689,7 @@ and keep the run-time key.
   key.EMBEDDED_TMPDIR.use                     = true
   key.CLOUD_METADATA_WHEN_CHALKED.use         = true
   key.EXTERNAL_TOOL_DURATION.use              = true
+  key.SECRET_SCANNER.use                      = true
   key.SBOM.use                                = true
   key.SAST.use                                = true
   key.ERR_INFO.use                            = true
diff --git a/src/configs/chalk.c42spec b/src/configs/chalk.c42spec
index 4838447b..155f93f6 100644
--- a/src/configs/chalk.c42spec
+++ b/src/configs/chalk.c42spec
@@ -21,7 +21,7 @@ all_cmds_that_insert := ["insert", "build", "push", "load", "setup"]
 
 # Beyond valid chalk commands, these can generate reports.
 other_report_ops     := ["build", "push", "heartbeat"]
-tool_types           := ["sbom", "sast"]
+tool_types           := ["sbom", "sast", "secret_scanner"]
 valid_log_levels     := ["verbose", "trace", "info", "warn", "error", "none"]
 key_types            := ["Chalk-time Host", "Chalk-time Artifact",
                          "Run-time Artifact", "Run-time Host"]
@@ -3080,7 +3080,18 @@ The syft command line arguments used at invocation (minus the target location) c
     doc: """
 When true, this will cause chalk to run any configured static analysis security testing (SAST) tools.  This is off by default, since it could add a noticeable delay to build time for large code bases.
 
-Currently, the only available tool out of the box is semgrep, and will only work on machines that either already have semgrep installed, or have Python3 installed.
+Currently, the only available tool out of the box is semgrep, and will only work on machines that either already have semgrep installed, have Python3 installed, or can run docker.
+"""
+  }
+
+  field run_secret_scanner_tools {
+    type:     bool
+    default:  false
+    shortdoc: "Run any configured secret scanner tools"
+    doc: """
+When true, this will cause chalk to run any configured secret scanner tools.  This is off by default.
+
+Currently, the only available tool out of the box is trufflehog.
 """
   }
 
diff --git a/src/configs/crashoverride.c4m b/src/configs/crashoverride.c4m
index 1062b8ea..6c598639 100644
--- a/src/configs/crashoverride.c4m
+++ b/src/configs/crashoverride.c4m
@@ -223,6 +223,7 @@ This is mostly a copy of insert template however all keys are immutable.
   ~key.EMBEDDED_TMPDIR.use                     = true
   ~key.CLOUD_METADATA_WHEN_CHALKED.use         = true
   ~key.EXTERNAL_TOOL_DURATION.use              = true
+  ~key.SECRET_SCANNER.use                      = true
   ~key.SBOM.use                                = true
   ~key.SAST.use                                = true
   ~key.ERR_INFO.use                            = true
diff --git a/src/configs/sastconfig.c4m b/src/configs/sastconfig.c4m
index 365877bc..0cff4202 100644
--- a/src/configs/sastconfig.c4m
+++ b/src/configs/sastconfig.c4m
@@ -13,7 +13,7 @@ tool semgrep {
   attempt_install:    func install_semgrep(string) -> bool
   get_command_args:   func get_semgrep_args(string) -> string
   produce_keys:       (func load_semgrep_results(string, int) ->
-                                                   dict[string, string])
+                                                   dict[string, `x])
   semgrep_config_profile: "auto"
   semgrep_format:         "sarif"
   semgrep_metrics:        "on"
diff --git a/src/configs/sbomconfig.c4m b/src/configs/sbomconfig.c4m
index 71ba0628..67d7517d 100644
--- a/src/configs/sbomconfig.c4m
+++ b/src/configs/sbomconfig.c4m
@@ -13,7 +13,7 @@ tool syft {
   ~attempt_install:    func install_syft(string) -> bool
   ~get_command_args:   func get_syft_args(string) -> string
   ~produce_keys:       (func extract_syft_sbom(string, int) ->
-                                                   dict[string, string])
+                                                   dict[string, `x])
   syft_exe_dir:       "/tmp"
   syft_installer:     "https://raw.githubusercontent.com/anchore/syft/main/install.sh"
   syft_container:     "anchore/syft"
diff --git a/src/configs/secretscannerconfig.c4m b/src/configs/secretscannerconfig.c4m
new file mode 100644
index 00000000..451e946e
--- /dev/null
+++ b/src/configs/secretscannerconfig.c4m
@@ -0,0 +1,173 @@
+##
+## Copyright (c) 2025, Crash Override, Inc.
+##
+## This file is part of Chalk
+## (see https://crashoverride.com/docs/chalk)
+##
+
+## Builtin Secret Scanning tool implementation(s).
+
+tool trufflehog {
+  kind: "secret_scanner"
+  get_tool_location:  func find_trufflehog(string) -> string
+  attempt_install:    func install_trufflehog(string) -> bool
+  get_command_args:   func get_trufflehog_args(string) -> string
+  produce_keys:       (func load_trufflehog_results(string, int) ->
+                                                   dict[string, `x])
+  trufflehog_config:         ""
+  trufflehog_format_flags:   "--json --no-github-actions"
+  trufflehog_other_flags:    ""
+  trufflehog_exe_dir:        "/tmp"
+  trufflehog_container:      "trufflesecurity/trufflehog"
+  trufflehog_entrypoint:     "trufflehog"
+  trufflehog_prefer_docker:  false
+  trufflehog_installer:      "https://raw.githubusercontent.com/trufflesecurity/trufflehog/main/scripts/install.sh"
+  doc: """
+This runs the trufflehog secret scanner.  If it doesn't exist in the
+path, chalk will:
+
+1. use docker (if present) to run trufflehog
+2. otherwise, attempt to install it via trufflehog install script
+
+You can configure the following fields in the tool.trufflehog object:
+
+trufflehog_prefer_docker:  When true, docker is preferred over system-installed trufflehog.
+                           Defaults to `false`.
+trufflehog_container:      The name of the docker container to use to run trufflehog.
+                           Defaults to 'trufflesecurity/trufflehog' from Docker Hub.
+trufflehog_entrypoint:     The entrypoint to use to run trufflehog.
+                           Defaults to 'trufflehog'.
+trufflehog_exe_dir:        In addition to $PATH, where to search/install trufflehog.
+                           Defaults to "/tmp".
+trufflehog_config:         The trufflehog config to use.
+                           By default no config is provided.
+trufflehog_format:         The output format flag to pass.
+                           Defaults to  'sarif'.
+"""
+}
+
+func trufflehog_docker(path) {
+  result := ""
+  if tool.trufflehog.trufflehog_entrypoint == "" or tool.trufflehog.trufflehog_container == "" {
+    trace("find_trufflehog: docker is disabled - both container and entrypoint must be defined")
+    return
+  }
+  docker_path := docker_exe()
+  if docker_path == "" {
+    trace("find_trufflehog: docker is missing; unable to use docker for trufflehog")
+    return
+  }
+  dir := path
+  if not is_dir(path) {
+    dir, _ := path_split(path)
+  }
+  cwd_volume := ""
+  if dir != cwd() {
+    cwd_volume := "-v " + cwd() + ":" + cwd() + " "
+  }
+  # Allow using a config from outside of cwd, such as in ~
+  config_volume := ""
+  config := resolve_path(tool.trufflehog.trufflehog_config)
+  if config != "" and is_file(config) {
+    config_volume := "-v " + config + ":" + config + " "
+  }
+  return (
+    docker_path + " run " +
+    "--rm " +
+    "--entrypoint=" + tool.trufflehog.trufflehog_entrypoint + " " +
+    "-w " + dir + " " +
+    "-v " + dir + ":" + dir + " " +
+    cwd_volume +
+    config_volume +
+    tool.trufflehog.trufflehog_container
+  )
+}
+
+func trufflehog_system() {
+  result := find_exe("trufflehog", [tool.trufflehog.trufflehog_exe_dir])
+  if result == "" {
+    trace("find_trufflehog: Unable to find trufflehog in $PATH")
+  } else {
+    trace("find_trufflehog: found trufflehog in $PATH: " + result)
+  }
+}
+
+func find_trufflehog(path) {
+  if tool.trufflehog.trufflehog_prefer_docker {
+    result := trufflehog_docker(path)
+    if result != "" {
+      return
+    }
+  }
+  result := trufflehog_system()
+  if result != "" {
+    return result
+  }
+  result := trufflehog_docker(path)
+}
+
+func install_trufflehog(path) {
+  info("Attempting to install trufflehog from " + tool.trufflehog.trufflehog_installer)
+
+  contents := url_get(tool.trufflehog.trufflehog_installer)
+  if not starts_with(contents, "#!") {
+    error("Trufflehog installer is not a valid shell script due to lack of shebang")
+    return false
+  }
+
+  installer := to_tmp_file(contents, ".sh")
+  cmdline   := "sh " + installer + " -b " + tool.trufflehog.trufflehog_exe_dir
+
+  trace("Running: " + cmdline)
+  sout, code := system(cmdline)
+
+  info(sout)
+  if code == 0 {
+    trace("Successfully installed trufflehog into: " + tool.trufflehog.trufflehog_exe_dir)
+    return true
+  }
+  else {
+    error("Unable to install trufflehog into: " + tool.trufflehog.trufflehog_exe_dir)
+    return false
+  }
+}
+
+func get_trufflehog_args(path) {
+  mode   := "filesystem"
+  prefix := ""
+  if is_dir(path) and is_dir(join_path(path, ".git")) {
+    mode   := "git"
+    prefix := "file://"
+  }
+  result := mode + " "
+  if tool.trufflehog.trufflehog_config != "" {
+    result := result + "--config=" + tool.trufflehog.trufflehog_config
+  }
+  result := result + tool.trufflehog.trufflehog_format_flags + " "
+  result := result + tool.trufflehog.trufflehog_other_flags + " "
+  result := result + prefix + path + " 2>/dev/null"
+}
+
+func load_trufflehog_results(out: string, code) {
+  result := {}
+
+  if code != 0 {
+    error("trufflehog failed to run properly; ignoring")
+    echo(out)
+    return {}
+  }
+
+  if strip(out) == "" {
+    info("trufflehog did not find any findings. ignoring")
+    return {}
+  }
+
+  if not starts_with(strip(out), "{") {
+    error("trufflehog did not run properly - invalid JSON returned; ignoring")
+    echo(out)
+    return {}
+  }
+
+  # trufflehog returns jsonl
+  return { "SECRET_SCANNER" : parse_jsonl(out) }
+}
diff --git a/src/confload.nim b/src/confload.nim
index 1b7f5e1a..7eb9ccd4 100644
--- a/src/confload.nim
+++ b/src/confload.nim
@@ -176,6 +176,7 @@ proc loadAllConfigs*() =
     addConfLoad(attestConfName,    toStream(attestConfig),    checkNone).
     addConfLoad(sbomConfName,      toStream(sbomConfig),      checkNone).
     addConfLoad(sastConfName,      toStream(sastConfig),      checkNone).
+    addConfLoad(secretsConfName,   toStream(secretsConfig),   checkNone).
     addConfLoad(techStackConfName, toStream(techStackConfig), checkNone).
     addConfLoad(linguistConfName,  toStream(linguistConfig),  checkNone).
     addConfLoad(coConfName,        toStream(coConfig),        checkNone)
diff --git a/src/normalize.nim b/src/normalize.nim
index ac5a981f..3bf5e44f 100644
--- a/src/normalize.nim
+++ b/src/normalize.nim
@@ -34,21 +34,23 @@ proc u64ToStr(i: uint64): string =
 
 proc floatToStr(f: float): string =
   result = newStringOfCap(sizeof(float)+1)
-  let arr = cast[array[8, char]](f)
 
 proc binEncodeItem(self: Box): string
+
 proc binEncodeStr(s: string): string =
   return "\x01" & u32ToStr(uint32(len(s))) & s
+
 proc binEncodeInt(i: uint64): string =
   return "\x02" & u64ToStr(i)
-proc binEncodeBool(b: bool): string  = return if b: "\x03\x01" else: "\x03\x00"
+
+proc binEncodeBool(b: bool): string  =
+  return if b: "\x03\x01" else: "\x03\x00"
 
 proc binEncodeArr(arr: seq[Box]): string =
   result = "\x04" & u32ToStr(uint32(len(arr)))
-
   for item in arr: result = result & binEncodeItem(item)
 
-proc binEncodeObj(self: ChalkDict, ignore: seq[string] = @[]): string =
+proc binEncodeTable(self: ChalkDict, ignore: seq[string] = @[]): string =
   var
     encoded = ""
     count   = 0
@@ -68,21 +70,26 @@ proc binEncodeObj(self: ChalkDict, ignore: seq[string] = @[]): string =
 proc binEncodeFloat(f: float): string =
   result = "\x06" & floatToStr(f)
 
+proc binEncodeObj(self: Box): string =
+  if self.o == nil:
+    return "\x07"
+  else:
+    error("non-null objects cannot be normalized")
+    unreachable
+
 proc binEncodeItem(self: Box): string =
   case self.kind
   of MkBool:  return binEncodeBool(unpack[bool](self))
   of MkInt:   return binEncodeInt(unpack[uint64](self))
   of MkStr:   return binEncodeStr(unpack[string](self))
-  of MkTable: return binEncodeObj(unpack[ChalkDict](self))
+  of MkTable: return binEncodeTable(unpack[ChalkDict](self))
   of MkSeq:   return binEncodeArr(unpack[seq[Box]](self))
   of MkFloat: return binEncodeFloat(unpack[float](self))
-  else:
-    echo self.kind, " ", $self
-    unreachable
+  of MkObj:   return binEncodeObj(self)
 
 proc normalizeChalk*(dict: ChalkDict): string =
   # Currently, this is only called for the METADATA_ID field, which only
   # signs things actually being written out.  We skip MAGIC, SIGNATURE
   # and SIGN_PARAMS.
   let ignoreList = attrGet[seq[string]]("ignore_when_normalizing")
-  return binEncodeObj(dict, ignoreList)
+  return binEncodeTable(dict, ignoreList)
diff --git a/src/plugins/externalTool.nim b/src/plugins/externalTool.nim
index 217a333b..cfff0433 100644
--- a/src/plugins/externalTool.nim
+++ b/src/plugins/externalTool.nim
@@ -25,7 +25,7 @@ proc clearCallback(self: Plugin) {.cdecl.} =
 proc ensureRunCallback[T](cb: CallbackObj, args: seq[Box]): T =
   let value = runCallback(cb, args)
   if value.isNone():
-    raise newException(ValueError, "missing implemenetation of " & $(cb))
+    raise newException(ValueError, "missing implementation of " & $(cb))
   return unpack[T](value.get())
 
 proc runOneTool(info: PIInfo, path: string): ChalkDict =
@@ -82,8 +82,9 @@ proc toolBase(path: string): ChalkDict =
   var
     toolInfo = initTable[string, seq[(int, PIInfo)]]()
   let
-    runSBOM  = attrGet[bool]("run_sbom_tools")
-    runSAST  = attrGet[bool]("run_sast_tools")
+    runSBOM    = attrGet[bool]("run_sbom_tools")
+    runSAST    = attrGet[bool]("run_sast_tools")
+    runSecrets = attrGet[bool]("run_secret_scanner_tools")
 
   # tools should only run during insert operations
   # note this is a subset of chalkable operations
@@ -94,8 +95,9 @@ proc toolBase(path: string): ChalkDict =
     let v = "tool." & k
     if not attrGet[bool](v & ".enabled"): continue
     let kind = attrGet[string](v & ".kind")
-    if not runSBOM and kind == "sbom": continue
-    if not runSAST and kind == "sast": continue
+    if not runSBOM    and kind == "sbom":           continue
+    if not runSAST    and kind == "sast":           continue
+    if not runSecrets and kind == "secret_scanner": continue
 
     let tool = (attrGet[int](v & ".priority"), PIInfo(name: k))
     if kind notin toolInfo:
diff --git a/src/selfextract.nim b/src/selfextract.nim
index 116cf37b..a914a45d 100644
--- a/src/selfextract.nim
+++ b/src/selfextract.nim
@@ -259,6 +259,7 @@ proc testConfigFile(newCon4m: string,
                addConfLoad(attestConfName,    toStream(attestConfig)).
                addConfLoad(sbomConfName,      toStream(sbomConfig)).
                addConfLoad(sastConfName,      toStream(sastConfig)).
+               addConfLoad(secretsConfName,   toStream(secretsConfig)).
                # TODO for Theo: load the internal config file for rules
                addConfLoad(linguistConfName,  toStream(linguistConfig)).
                addConfLoad(techStackConfName, toStream(techStackConfig)).
diff --git a/tests/functional/conf.py b/tests/functional/conf.py
index 999316e2..0ec0903a 100644
--- a/tests/functional/conf.py
+++ b/tests/functional/conf.py
@@ -67,3 +67,17 @@
 UNAME_PATH = shutil.which("uname")
 SLEEP_PATH = shutil.which("sleep")
 GDB_PATH = shutil.which("gdb")
+
+
+AWS_ACCESS_KEY_ID = os.environ.get("AWS_ACCESS_KEY_ID", "")
+AWS_SECRET_ACCESS_KEY = os.environ.get("AWS_SECRET_ACCESS_KEY", "")
+AWS_SESSION_TOKEN = os.environ.get("AWS_SESSION_TOKEN", "")
+
+
+def aws_secrets_configured() -> bool:
+    return all(
+        [
+            bool(AWS_ACCESS_KEY_ID),
+            bool(AWS_SECRET_ACCESS_KEY),
+        ]
+    )
diff --git a/tests/functional/data/configs/composable/valid/secret_scanner/enable_secrets.c4m b/tests/functional/data/configs/composable/valid/secret_scanner/enable_secrets.c4m
new file mode 100644
index 00000000..51737335
--- /dev/null
+++ b/tests/functional/data/configs/composable/valid/secret_scanner/enable_secrets.c4m
@@ -0,0 +1,19 @@
+run_secret_scanner_tools: true
+
+# `chalk insert` and `chalk docker build` output secret to terminal
+report_template.terminal_insert.key.SECRET_SCANNER.use: true
+report_template.insertion_default.key.SECRET_SCANNER.use: true
+
+# Embed SECRET_SCANNER reports in chalk marks.
+
+# `chalk insert` uses the mark_default template.
+mark_template.mark_default.key.SECRET_SCANNER.use: true
+
+# `chalk docker build` uses the `minimal` template.
+mark_template.minimal.key.SECRET_SCANNER.use: true
+
+if env("EXTERNAL_TOOL_USE_DOCKER") != "False" {
+  tool.trufflehog.trufflehog_prefer_docker = true
+} else {
+  tool.trufflehog.trufflehog_entrypoint    = ""
+}
diff --git a/tests/functional/test_plugins.py b/tests/functional/test_plugins.py
index 10b8b8cd..18ec5f4c 100644
--- a/tests/functional/test_plugins.py
+++ b/tests/functional/test_plugins.py
@@ -10,7 +10,19 @@
 import pytest
 
 from .chalk.runner import Chalk, ChalkMark
-from .conf import CODEOWNERS, CONFIGS, DATA, DOCKERFILES, LS_PATH, PYS, REPO
+from .conf import (
+    AWS_ACCESS_KEY_ID,
+    AWS_SECRET_ACCESS_KEY,
+    AWS_SESSION_TOKEN,
+    CODEOWNERS,
+    CONFIGS,
+    DATA,
+    DOCKERFILES,
+    LS_PATH,
+    PYS,
+    REPO,
+    aws_secrets_configured,
+)
 from .utils.dict import ANY, MISSING
 from .utils.docker import Docker
 from .utils.git import Git
@@ -1011,3 +1023,48 @@ def test_semgrep(
     # check that sbom has been embedded into the artifact
     chalk_mark = ChalkMark.from_binary(tmp_data_dir / "hello.sh")
     assert chalk_mark.contains(mark_sast_data)
+
+
+@pytest.mark.parametrize("use_docker", [True, False])
+@pytest.mark.skipif(not aws_secrets_configured(), reason="AWS secrets not configured")
+def test_trufflehog(chalk: Chalk, tmp_data_dir: Path, use_docker: bool):
+    target = tmp_data_dir / "aws.sh"
+    target.write_text(
+        f"""
+#!/bin/sh
+export AWS_ACCESS_KEY_ID={AWS_ACCESS_KEY_ID}
+export AWS_SECRET_ACCESS_KEY={AWS_SECRET_ACCESS_KEY}
+export AWS_SESSION_TOKEN={AWS_SESSION_TOKEN}
+""".strip()
+    )
+    insert = chalk.insert(
+        artifact=target,
+        env={"EXTERNAL_TOOL_USE_DOCKER": str(use_docker)},
+        config=(
+            CONFIGS / "composable" / "valid" / "secret_scanner" / "enable_secrets.c4m"
+        ),
+    )
+    data = {
+        "trufflehog": [
+            {
+                "SourceName": "trufflehog - filesystem",
+                "SourceMetadata": {
+                    "Data": {
+                        "Filesystem": {
+                            "file": str(target),
+                            "line": int,
+                        }
+                    }
+                },
+                "DetectorName": re.compile(r"^AWS"),
+            },
+        ],
+    }
+    assert insert.report.has(
+        # scans whole folder
+        SECRET_SCANNER=data,
+    )
+    assert insert.mark.has(
+        # scans specific file
+        SECRET_SCANNER=data,
+    )
diff --git a/tests/functional/test_sink.py b/tests/functional/test_sink.py
index 927c22f7..43a6948c 100644
--- a/tests/functional/test_sink.py
+++ b/tests/functional/test_sink.py
@@ -12,22 +12,20 @@
 import requests
 
 from .chalk.runner import Chalk
-from .conf import CAT_PATH, SERVER_CERT, SERVER_HTTP, SERVER_HTTPS, SINK_CONFIGS
+from .conf import (
+    CAT_PATH,
+    SERVER_CERT,
+    SERVER_HTTP,
+    SERVER_HTTPS,
+    SINK_CONFIGS,
+    aws_secrets_configured,
+)
 from .utils.log import get_logger
 
 
 logger = get_logger()
 
 
-def aws_secrets_configured() -> bool:
-    return all(
-        [
-            bool(os.environ.get("AWS_ACCESS_KEY_ID", "")),
-            bool(os.environ.get("AWS_SECRET_ACCESS_KEY", "")),
-        ]
-    )
-
-
 # validates some basic fields on the chalk output, which should be all the same
 # since we will only be chalking one target
 def _validate_chalk(