Skip to content

Commit

Permalink
Update GitParse to handle quoted binary filenames (#2391)
Browse files Browse the repository at this point in the history
* fix(gitparse): quoted binary files

* fix(gitparse): use bytes.Cut instead of regexp

* fix lint warning

---------

Co-authored-by: Zachary Rice <zachary.rice@trufflesec.com>
  • Loading branch information
rgmz and zricethezav authored Feb 8, 2024
1 parent a00ffe9 commit 3b40c4f
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 14 deletions.
41 changes: 30 additions & 11 deletions pkg/gitparse/gitparse.go
Original file line number Diff line number Diff line change
Expand Up @@ -458,10 +458,17 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, diffChan chan
case isBinaryLine(latestState, line):
latestState = BinaryFileLine

currentDiff.PathB = pathFromBinaryLine(line)
path, ok := pathFromBinaryLine(line)
if !ok {
err = fmt.Errorf(`expected line to match 'Binary files a/fileA and b/fileB differ', got "%s"`, line)
ctx.Logger().Error(err, "Failed to parse binary file line")
latestState = ParseFailure
continue
}

// Don't do anything if the file is deleted. (pathA has file path, pathB is /dev/null)
if currentDiff.PathB != "" {
if path != "" {
currentDiff.PathB = path
currentDiff.IsBinary = true
}
case isFromFileLine(latestState, line):
Expand Down Expand Up @@ -708,15 +715,27 @@ func isBinaryLine(latestState ParseState, line []byte) bool {
}

// Get the b/ file path. Ignoring the edge case of files having `and /b` in the name for simplicity.
func pathFromBinaryLine(line []byte) string {
logger := context.Background().Logger()
sbytes := bytes.Split(line, []byte(" and b/"))
if len(sbytes) != 2 {
logger.V(2).Info("Expected binary line to be in 'Binary files a/fileA and b/fileB differ' format.", "got", line)
return ""
}
bRaw := sbytes[1]
return string(bRaw[:len(bRaw)-8]) // drop the "b/" and " differ\n"
func pathFromBinaryLine(line []byte) (string, bool) {
if bytes.Contains(line, []byte("and /dev/null")) {
return "", true
}

_, after, ok := bytes.Cut(line, []byte(" and b/"))
if ok {
// drop the " differ\n"
return string(after[:len(after)-8]), true
}

// Edge case where the path is quoted.
// https://github.com/trufflesecurity/trufflehog/issues/2384
_, after, ok = bytes.Cut(line, []byte(` and "b/`))
if ok {
// drop the `" differ\n`
return string(after[:len(after)-9]), true
}

// Unknown format.
return "", false
}

// --- a/internal/addrs/move_endpoint_module.go
Expand Down
11 changes: 8 additions & 3 deletions pkg/gitparse/gitparse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -591,12 +591,17 @@ func TestLineChecksNoStaged(t *testing.T) {

func TestBinaryPathParse(t *testing.T) {
cases := map[string]string{
"Binary files /dev/null and b/plugin.sig differ\n": "plugin.sig",
"Binary files /dev/null and b/ Lunch and Learn - HCDiag.pdf differ\n": " Lunch and Learn - HCDiag.pdf",
"Binary files a/trufflehog_3.42.0_linux_arm64.tar.gz and /dev/null differ\n": "",
"Binary files /dev/null and b/plugin.sig differ\n": "plugin.sig",
"Binary files /dev/null and b/ Lunch and Learn - HCDiag.pdf differ\n": " Lunch and Learn - HCDiag.pdf",
"Binary files /dev/null and \"b/assets/retailers/ON-ikony-Platforma-ecom \\342\\200\\224 kopia.png\" differ\n": "assets/retailers/ON-ikony-Platforma-ecom \\342\\200\\224 kopia.png",
}

for name, expected := range cases {
filename := pathFromBinaryLine([]byte(name))
filename, ok := pathFromBinaryLine([]byte(name))
if !ok {
t.Errorf("Failed to get path: %s", name)
}
if filename != expected {
t.Errorf("Expected: %s, Got: %s", expected, filename)
}
Expand Down

0 comments on commit 3b40c4f

Please sign in to comment.