Skip to content

Commit

Permalink
feat: trufflehog external tool (#489)
Browse files Browse the repository at this point in the history
* feat: trufflehog external tool

* adding secret_scanner config module
  • Loading branch information
miki725 authored Feb 19, 2025
1 parent 4fb2d19 commit 217425e
Show file tree
Hide file tree
Showing 19 changed files with 381 additions and 32 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@
- `EXTERNAL_TOOL_DURATION` key which reports external tool duration
for each invocation.
([#488](https://github.com/crashappsec/chalk/pull/488))
- `run_secret_scanner_tools` configuration which then collects new
`SECRET_SCANNER` key. Currently only trufflehog is supported.
([#489](https://github.com/crashappsec/chalk/pull/489))

## 0.5.3

Expand Down
7 changes: 7 additions & 0 deletions configs/co/secret_scanner.c4m
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
parameter var collect_secret_scanner {
default: true
shortdoc: "Collect secret scanner via trufflehog"
doc: "Whether secret scanner results should be collected for chalking operations via trufflehog"
}

~run_secret_scanner_tools = collect_secret_scanner
2 changes: 2 additions & 0 deletions src/chalk_common.nim
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,7 @@ const
baseConfName* = "configs/base_*.c4m"
sbomConfName* = "configs/sbomconfig.c4m"
sastConfName* = "configs/sastconfig.c4m"
secretsConfName* = "configs/secretscannerconfig.c4m"
techStackConfName* = "configs/techstackconfig.c4m"
linguistConfName* = "configs/linguist.c4m"
ioConfName* = "configs/ioconfig.c4m"
Expand All @@ -454,6 +455,7 @@ const
staticRead("configs/buildkitcmd.c4m")
sbomConfig* = staticRead(sbomConfName)
sastConfig* = staticRead(sastConfName)
secretsConfig* = staticRead(secretsConfName)
techStackConfig* = staticRead(techStackConfName)
linguistConfig* = staticRead(linguistConfName)
ioConfig* = staticRead(ioConfName)
Expand Down
22 changes: 20 additions & 2 deletions src/con4mfuncs.nim
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,19 @@ proc c4mParseJson(args: seq[Box], unused = ConfigState(nil)): Option[Box] =
error("Could not parse JSON: " & getCurrentExceptionMsg())
return none(Box)

proc c4mParseJsonL(args: seq[Box], unused = ConfigState(nil)): Option[Box] =
let
data = unpack[string](args[0])
try:
var json = newJArray()
for line in data.strip().splitLines():
json.add(parseJson(line))
let box = nimJsonToBox(json)
return some(box)
except:
error("Could not parse JSON: " & getCurrentExceptionMsg())
return none(Box)

proc dockerExe(args: seq[Box], unused = ConfigState(nil)): Option[Box] =
return some(pack(getDockerExeLocation()))

Expand Down Expand Up @@ -221,8 +234,13 @@ This way the function is only computed once.
("parse_json(string) -> `x",
BuiltInFn(c4mParseJson),
"""
Same as `url_post()`, but takes a certificate file location in the final
parameter, with which HTTPS connections must authenticate against.
Parses JSON string and returns data-struct back.
""",
@["parsing"]),
("parse_jsonl(string) -> `x",
BuiltInFn(c4mParseJsonL),
"""
Parses JSONl string and returns data-struct back.
""",
@["parsing"]),
("docker_exe() -> string",
Expand Down
31 changes: 30 additions & 1 deletion src/configs/base_keyspecs.c4m
Original file line number Diff line number Diff line change
Expand Up @@ -1480,7 +1480,7 @@ keyspec SBOM {
since: "0.1.0"
shortdoc: "SBOM(s) collected at Chalk time"
doc: """
This field is meant to captures any SBOMs associated with a chalking
This field is meant to capture any SBOMs associated with a chalking
(i.e., a chalk mark insertion operation). The value, when provided, is
a dictionary. The keys of that dictionary indicate the tool used to
perform the chalking, and the value consists of a free-form JSON
Expand All @@ -1499,6 +1499,35 @@ collecting this information.
"""
}

keyspec SECRET_SCANNER {
kind: ChalkTimeArtifact
type: dict[string, dict[string, `x]]
standard: true
since: "0.5.4"
shortdoc: "Secret scanner results collected at Chalk time"
doc: """
This field is meant to capture any secret scanning tool results while
chalking artifacts. Then value, when provided, is a dictionary.
The keys of that dictionary indicate the tool used, and the value consists of
free-form JSON object returned by the scanning tool.

Currently only these tools are supported:

* trufflehog.
If the chalking is done in a git repo, trufflehog is used in `git` mode
which will only scan files checked in the repo.
Otherwise trufflehog is used in `filesystem` mode where it will scan
all files within the chalking context directory.
**NOTE** that trufflehog validates all findings by checking
secrets it finds. For example for AWS keys, it will check if they
are still active creds in AWS. This reduces noise however might have
security-considerations depending on the security posture of the org.

This is not enabled by default and `run_secret_scanner_tools` config needs
to be enabled to run secret scanners.
"""
}

keyspec EXTERNAL_TOOL_DURATION {
kind: ChalkTimeArtifact
type: dict[string, dict[string, int]]
Expand Down
6 changes: 6 additions & 0 deletions src/configs/base_report_templates.c4m
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ report and subtract from it.
key.EMBEDDED_TMPDIR.use = true
key.CLOUD_METADATA_WHEN_CHALKED.use = true
key.EXTERNAL_TOOL_DURATION.use = true
key.SECRET_SCANNER.use = true
key.SBOM.use = true
key.SAST.use = true
key.ERR_INFO.use = true
Expand Down Expand Up @@ -505,6 +506,7 @@ doc: """
key.INJECTOR_ENV.use = true
key.TENANT_ID_WHEN_CHALKED.use = true
key.EXTERNAL_TOOL_DURATION.use = true
key.SECRET_SCANNER.use = true
key.SBOM.use = true
key.SAST.use = true
key._ACTION_ID.use = true
Expand Down Expand Up @@ -711,6 +713,7 @@ doc: """
key.EMBEDDED_TMPDIR.use = true
key.CLOUD_METADATA_WHEN_CHALKED.use = true
key.EXTERNAL_TOOL_DURATION.use = true
key.SECRET_SCANNER.use = true
key.SBOM.use = true
key.SAST.use = true
key.ERR_INFO.use = true
Expand Down Expand Up @@ -1015,6 +1018,7 @@ container.
key.INJECTOR_ENV.use = true
key.TENANT_ID_WHEN_CHALKED.use = true
key.EXTERNAL_TOOL_DURATION.use = true
key.SECRET_SCANNER.use = true
key.SBOM.use = true
key.SAST.use = true

Expand Down Expand Up @@ -1198,6 +1202,7 @@ container.
key.EMBEDDED_TMPDIR.use = true
key.CLOUD_METADATA_WHEN_CHALKED.use = true
key.EXTERNAL_TOOL_DURATION.use = true
key.SECRET_SCANNER.use = true
key.SBOM.use = true
key.SAST.use = true
key.ERR_INFO.use = true
Expand Down Expand Up @@ -1684,6 +1689,7 @@ and keep the run-time key.
key.EMBEDDED_TMPDIR.use = true
key.CLOUD_METADATA_WHEN_CHALKED.use = true
key.EXTERNAL_TOOL_DURATION.use = true
key.SECRET_SCANNER.use = true
key.SBOM.use = true
key.SAST.use = true
key.ERR_INFO.use = true
Expand Down
15 changes: 13 additions & 2 deletions src/configs/chalk.c42spec
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ all_cmds_that_insert := ["insert", "build", "push", "load", "setup"]

# Beyond valid chalk commands, these can generate reports.
other_report_ops := ["build", "push", "heartbeat"]
tool_types := ["sbom", "sast"]
tool_types := ["sbom", "sast", "secret_scanner"]
valid_log_levels := ["verbose", "trace", "info", "warn", "error", "none"]
key_types := ["Chalk-time Host", "Chalk-time Artifact",
"Run-time Artifact", "Run-time Host"]
Expand Down Expand Up @@ -3080,7 +3080,18 @@ The syft command line arguments used at invocation (minus the target location) c
doc: """
When true, this will cause chalk to run any configured static analysis security testing (SAST) tools. This is off by default, since it could add a noticeable delay to build time for large code bases.

Currently, the only available tool out of the box is semgrep, and will only work on machines that either already have semgrep installed, or have Python3 installed.
Currently, the only available tool out of the box is semgrep, and will only work on machines that either already have semgrep installed, have Python3 installed, or can run docker.
"""
}

field run_secret_scanner_tools {
type: bool
default: false
shortdoc: "Run any configured secret scanner tools"
doc: """
When true, this will cause chalk to run any configured secret scanner tools. This is off by default.

Currently, the only available tool out of the box is trufflehog.
"""
}

Expand Down
1 change: 1 addition & 0 deletions src/configs/crashoverride.c4m
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ This is mostly a copy of insert template however all keys are immutable.
~key.EMBEDDED_TMPDIR.use = true
~key.CLOUD_METADATA_WHEN_CHALKED.use = true
~key.EXTERNAL_TOOL_DURATION.use = true
~key.SECRET_SCANNER.use = true
~key.SBOM.use = true
~key.SAST.use = true
~key.ERR_INFO.use = true
Expand Down
2 changes: 1 addition & 1 deletion src/configs/sastconfig.c4m
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ tool semgrep {
attempt_install: func install_semgrep(string) -> bool
get_command_args: func get_semgrep_args(string) -> string
produce_keys: (func load_semgrep_results(string, int) ->
dict[string, string])
dict[string, `x])
semgrep_config_profile: "auto"
semgrep_format: "sarif"
semgrep_metrics: "on"
Expand Down
2 changes: 1 addition & 1 deletion src/configs/sbomconfig.c4m
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ tool syft {
~attempt_install: func install_syft(string) -> bool
~get_command_args: func get_syft_args(string) -> string
~produce_keys: (func extract_syft_sbom(string, int) ->
dict[string, string])
dict[string, `x])
syft_exe_dir: "/tmp"
syft_installer: "https://raw.githubusercontent.com/anchore/syft/main/install.sh"
syft_container: "anchore/syft"
Expand Down
173 changes: 173 additions & 0 deletions src/configs/secretscannerconfig.c4m
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
##
## Copyright (c) 2025, Crash Override, Inc.
##
## This file is part of Chalk
## (see https://crashoverride.com/docs/chalk)
##

## Builtin Secret Scanning tool implementation(s).

tool trufflehog {
kind: "secret_scanner"
get_tool_location: func find_trufflehog(string) -> string
attempt_install: func install_trufflehog(string) -> bool
get_command_args: func get_trufflehog_args(string) -> string
produce_keys: (func load_trufflehog_results(string, int) ->
dict[string, `x])
trufflehog_config: ""
trufflehog_format_flags: "--json --no-github-actions"
trufflehog_other_flags: ""
trufflehog_exe_dir: "/tmp"
trufflehog_container: "trufflesecurity/trufflehog"
trufflehog_entrypoint: "trufflehog"
trufflehog_prefer_docker: false
trufflehog_installer: "https://raw.githubusercontent.com/trufflesecurity/trufflehog/main/scripts/install.sh"
doc: """
This runs the trufflehog secret scanner. If it doesn't exist in the
path, chalk will:

1. use docker (if present) to run trufflehog
2. otherwise, attempt to install it via trufflehog install script

You can configure the following fields in the tool.trufflehog object:

trufflehog_prefer_docker: When true, docker is preferred over system-installed trufflehog.
Defaults to `false`.
trufflehog_container: The name of the docker container to use to run trufflehog.
Defaults to 'trufflesecurity/trufflehog' from Docker Hub.
trufflehog_entrypoint: The entrypoint to use to run trufflehog.
Defaults to 'trufflehog'.
trufflehog_exe_dir: In addition to $PATH, where to search/install trufflehog.
Defaults to "/tmp".
trufflehog_config: The trufflehog config to use.
By default no config is provided.
trufflehog_format: The output format flag to pass.
Defaults to 'sarif'.
"""
}

func trufflehog_docker(path) {
result := ""
if tool.trufflehog.trufflehog_entrypoint == "" or tool.trufflehog.trufflehog_container == "" {
trace("find_trufflehog: docker is disabled - both container and entrypoint must be defined")
return
}
docker_path := docker_exe()
if docker_path == "" {
trace("find_trufflehog: docker is missing; unable to use docker for trufflehog")
return
}
dir := path
if not is_dir(path) {
dir, _ := path_split(path)
}
cwd_volume := ""
if dir != cwd() {
cwd_volume := "-v " + cwd() + ":" + cwd() + " "
}
# Allow using a config from outside of cwd, such as in ~
config_volume := ""
config := resolve_path(tool.trufflehog.trufflehog_config)
if config != "" and is_file(config) {
config_volume := "-v " + config + ":" + config + " "
}
return (
docker_path + " run " +
"--rm " +
"--entrypoint=" + tool.trufflehog.trufflehog_entrypoint + " " +
"-w " + dir + " " +
"-v " + dir + ":" + dir + " " +
cwd_volume +
config_volume +
tool.trufflehog.trufflehog_container
)
}

func trufflehog_system() {
result := find_exe("trufflehog", [tool.trufflehog.trufflehog_exe_dir])
if result == "" {
trace("find_trufflehog: Unable to find trufflehog in $PATH")
} else {
trace("find_trufflehog: found trufflehog in $PATH: " + result)
}
}

func find_trufflehog(path) {
if tool.trufflehog.trufflehog_prefer_docker {
result := trufflehog_docker(path)
if result != "" {
return
}
}
result := trufflehog_system()
if result != "" {
return result
}
result := trufflehog_docker(path)
}

func install_trufflehog(path) {
info("Attempting to install trufflehog from " + tool.trufflehog.trufflehog_installer)

contents := url_get(tool.trufflehog.trufflehog_installer)
if not starts_with(contents, "#!") {
error("Trufflehog installer is not a valid shell script due to lack of shebang")
return false
}

installer := to_tmp_file(contents, ".sh")
cmdline := "sh " + installer + " -b " + tool.trufflehog.trufflehog_exe_dir

trace("Running: " + cmdline)
sout, code := system(cmdline)

info(sout)
if code == 0 {
trace("Successfully installed trufflehog into: " + tool.trufflehog.trufflehog_exe_dir)
return true
}
else {
error("Unable to install trufflehog into: " + tool.trufflehog.trufflehog_exe_dir)
return false
}
}

func get_trufflehog_args(path) {
mode := "filesystem"
prefix := ""
if is_dir(path) and is_dir(join_path(path, ".git")) {
mode := "git"
prefix := "file://"
}
result := mode + " "
if tool.trufflehog.trufflehog_config != "" {
result := result + "--config=" + tool.trufflehog.trufflehog_config
}
result := result + tool.trufflehog.trufflehog_format_flags + " "
result := result + tool.trufflehog.trufflehog_other_flags + " "
result := result + prefix + path + " 2>/dev/null"
}

func load_trufflehog_results(out: string, code) {
result := {}

if code != 0 {
error("trufflehog failed to run properly; ignoring")
echo(out)
return {}
}

if strip(out) == "" {
info("trufflehog did not find any findings. ignoring")
return {}
}

if not starts_with(strip(out), "{") {
error("trufflehog did not run properly - invalid JSON returned; ignoring")
echo(out)
return {}
}

# trufflehog returns jsonl
return { "SECRET_SCANNER" : parse_jsonl(out) }
}
1 change: 1 addition & 0 deletions src/confload.nim
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ proc loadAllConfigs*() =
addConfLoad(attestConfName, toStream(attestConfig), checkNone).
addConfLoad(sbomConfName, toStream(sbomConfig), checkNone).
addConfLoad(sastConfName, toStream(sastConfig), checkNone).
addConfLoad(secretsConfName, toStream(secretsConfig), checkNone).
addConfLoad(techStackConfName, toStream(techStackConfig), checkNone).
addConfLoad(linguistConfName, toStream(linguistConfig), checkNone).
addConfLoad(coConfName, toStream(coConfig), checkNone)
Expand Down
Loading

0 comments on commit 217425e

Please sign in to comment.