Skip to content

Commit

Permalink
fix: do not fail to scan paths longer than 256 characters
Browse files Browse the repository at this point in the history
GitGuardian API does not accept paths longer than 256 characters. Truncate
longer paths to keep only the end.

Fixes a regression introduced in 1.13.3.

Fixes #391
  • Loading branch information
agateau-gg committed Oct 18, 2022
1 parent f5c3e72 commit 6614fcd
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 2 deletions.
8 changes: 7 additions & 1 deletion ggshield/scan/scannable.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@

_RX_HEADER_LINE_SEPARATOR = re.compile("[\n\0]:", re.MULTILINE)

# GitGuardian API does not accept paths longer than this
_API_PATH_MAX_LENGTH = 256


def _parse_patch_header_line(line: str) -> Tuple[str, Filemode]:
"""
Expand Down Expand Up @@ -313,7 +316,10 @@ def _scan_chunk(
Sends a chunk of files to scan to the API
"""
# `documents` is a version of `chunk` suitable for `GGClient.multi_content_scan()`
documents = [{"document": x.document, "filename": x.filename} for x in chunk]
documents = [
{"document": x.document, "filename": x.filename[-_API_PATH_MAX_LENGTH:]}
for x in chunk
]
return executor.submit(
self.client.multi_content_scan,
documents,
Expand Down
19 changes: 18 additions & 1 deletion tests/functional/secret/test_scan_path.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from pathlib import Path

from tests.conftest import GG_VALID_TOKEN
from tests.functional.utils import run_ggshield_scan
from tests.functional.utils import recreate_censored_content, run_ggshield_scan


def test_scan_path(tmp_path: Path) -> None:
Expand All @@ -10,3 +10,20 @@ def test_scan_path(tmp_path: Path) -> None:

result = run_ggshield_scan("path", str(test_file), cwd=tmp_path, expected_code=1)
assert "SECRET=" in result.stdout


def test_scan_path_does_not_fail_on_long_paths(tmp_path: Path) -> None:
# GIVEN a secret stored in a file whose path is longer than 256 characters
secret_content = f"SECRET='{GG_VALID_TOKEN}'"

# Create the file in a subdir because filenames cannot be longer than 255
# characters. What we care here is the length of the path.
test_file = tmp_path / ("d" * 255) / ("f" * 255)
test_file.parent.mkdir()
test_file.write_text(secret_content)

# WHEN ggshield scans it
result = run_ggshield_scan("path", str(test_file), cwd=tmp_path, expected_code=1)

# THEN it finds the secret in it
assert recreate_censored_content(secret_content, GG_VALID_TOKEN) in result.stdout

0 comments on commit 6614fcd

Please sign in to comment.