Skip to content

Commit

Permalink
Use GzipFile python unpacker for speed, fall back on pigz if needed (#…
Browse files Browse the repository at this point in the history
…472)

* Use GzipFile python unpacker for speed, fall back on pigz if needed

* Add a changelog entry
  • Loading branch information
ANogin committed Jun 6, 2024
1 parent 0a8d9c3 commit 35aac89
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 30 deletions.
1 change: 1 addition & 0 deletions ofrak_core/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
- View resource attribute string values containing only digits primarily as strings, alternatively as hex numbers. ([#423](https://github.com/redballoonsecurity/ofrak/pull/423))

### Changed
- In `GripUnpacker`, use `gzip.GzipFile` python unpacker for speed, fall back on `pigz` if needed ([#472](https://github.com/redballoonsecurity/ofrak/pull/472))
- Change `FreeSpaceModifier` & `PartialFreeSpaceModifier` behavior: an optional stub that isn't free space can be provided and fill-bytes for free space can be specified. ([#409](https://github.com/redballoonsecurity/ofrak/pull/409))
- `Resource.flush_to_disk` method renamed to `Resource.flush_data_to_disk`. ([#373](https://github.com/redballoonsecurity/ofrak/pull/373))
- `build_image.py` supports building Docker images with OFRAK packages from any ancestor directory. ([#425](https://github.com/redballoonsecurity/ofrak/pull/425))
Expand Down
70 changes: 40 additions & 30 deletions ofrak_core/ofrak/core/gzip.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import asyncio
import logging
import tempfile
from gzip import GzipFile
from gzip import BadGzipFile, GzipFile
from io import BytesIO
from subprocess import CalledProcessError

Expand Down Expand Up @@ -40,35 +40,45 @@ class GzipUnpacker(Unpacker[None]):
external_dependencies = (PIGZ,)

async def unpack(self, resource: Resource, config=None):
# Create temporary file with .gz extension
with tempfile.NamedTemporaryFile(suffix=".gz") as temp_file:
temp_file.write(await resource.get_data())
temp_file.flush()
cmd = [
"pigz",
"-d",
"-c",
temp_file.name,
]
proc = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout, stderr = await proc.communicate()
data = stdout
if proc.returncode:
# Forward any gzip warning message and continue
if proc.returncode == -2 or proc.returncode == 2:
LOGGER.warning(stderr)
data = stdout
else:
raise CalledProcessError(returncode=proc.returncode, cmd=cmd)

await resource.create_child(
tags=(GenericBinary,),
data=data,
)
data = await resource.get_data()
# GzipFile is faster (spawning external processes has overhead),
# but pigz is more willing to tolerate things like extra junk at the end
try:
with GzipFile(fileobj=BytesIO(data), mode="r") as gzip_file:
return await resource.create_child(
tags=(GenericBinary,),
data=gzip_file.read(),
)
except BadGzipFile:
# Create temporary file with .gz extension
with tempfile.NamedTemporaryFile(suffix=".gz") as temp_file:
temp_file.write(data)
temp_file.flush()
cmd = [
"pigz",
"-d",
"-c",
temp_file.name,
]
proc = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
stdout, stderr = await proc.communicate()
data = stdout
if proc.returncode:
# Forward any gzip warning message and continue
if proc.returncode == -2 or proc.returncode == 2:
LOGGER.warning(stderr)
data = stdout
else:
raise CalledProcessError(returncode=proc.returncode, cmd=cmd, stderr=stderr)

await resource.create_child(
tags=(GenericBinary,),
data=data,
)


class GzipPacker(Packer[None]):
Expand Down

0 comments on commit 35aac89

Please sign in to comment.