Skip to content

Commit

Permalink
Lz4 support for pt.io.autoopen() (#323, addresses #317)
Browse files Browse the repository at this point in the history
* added lz4, addresses #317
* added test cases
  • Loading branch information
cmacdonald authored Aug 26, 2022
1 parent 8af7a06 commit c706cbf
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 1 deletion.
5 changes: 4 additions & 1 deletion pyterrier/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def coerce_dataframe(obj):

def autoopen(filename, mode='rb'):
"""
A drop-in for open() that applies automatic compression for .gz and .bz2 file extensions
A drop-in for open() that applies automatic compression for .gz, .bz2 and .lz4 file extensions
"""

if filename.endswith(".gz"):
Expand All @@ -26,6 +26,9 @@ def autoopen(filename, mode='rb'):
elif filename.endswith(".bz2"):
import bz2
return bz2.open(filename, mode)
elif filename.endswith(".lz4"):
import lz4.frame
return lz4.frame.open(filename, mode)
return open(filename, mode)

def find_files(dir):
Expand Down
1 change: 1 addition & 0 deletions requirements-test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ xgboost
ray
fastrank>=0.7.0
torch
lz4
Binary file added tests/fixtures/testfile.txt.lz4
Binary file not shown.
9 changes: 9 additions & 0 deletions tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,15 @@

class TestUtils(TempDirTestCase):

def test_compression(self):
with pt.io.autoopen(os.path.join(self.here, 'fixtures/testfile.txt.lz4'), 'rt') as f:
lines = f.readlines()
self.assertEqual('hello world', lines[0])
with pt.io.autoopen(os.path.join(self.test_dir, "bla.txt.gz"), 'wt') as f:
f.write('bla')
with pt.io.autoopen(os.path.join(self.test_dir, "bla.txt.gz"), 'rt') as f:
lines = f.readlines()
self.assertEqual('bla', lines[0])

def test_save_trec(self):
res = pd.DataFrame([["1", "d1", 5.3, 1]], columns=['qid', 'docno', 'score', 'rank'])
Expand Down

0 comments on commit c706cbf

Please sign in to comment.