Skip to content

Commit

Permalink
#485: addresses #423 IRDS warnings on pt.list_datasets()
Browse files Browse the repository at this point in the history
  • Loading branch information
cmacdonald authored Sep 25, 2024
2 parents fc95d3a + e993e74 commit 24c5b55
Showing 1 changed file with 24 additions and 12 deletions.
36 changes: 24 additions & 12 deletions pyterrier/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -1188,18 +1188,30 @@ def list_datasets(en_only=True):
By default, filters to only datasets with both a corpus and topics in English.
"""
import pandas as pd
rows=[]
for k in datasets():
dataset = get_dataset(k)
rows.append([
k,
dataset._describe_component("topics"),
dataset.get_topics_lang(),
dataset._describe_component("qrels"),
dataset._describe_component("corpus"),
dataset.get_corpus_lang(),
dataset._describe_component("index"),
dataset.info_url() ])
import os

# we should supress any IRDS warning about deprecated datasets
restore_env = os.environ.get("IR_DATASETS_SKIP_DEPRECATED_WARNING", None)
try:
os.environ['IR_DATASETS_SKIP_DEPRECATED_WARNING'] = 'true'
rows=[]
for k in datasets():
dataset = get_dataset(k)
rows.append([
k,
dataset._describe_component("topics"),
dataset.get_topics_lang(),
dataset._describe_component("qrels"),
dataset._describe_component("corpus"),
dataset.get_corpus_lang(),
dataset._describe_component("index"),
dataset.info_url() ])
finally:
if restore_env is None:
del os.environ['IR_DATASETS_SKIP_DEPRECATED_WARNING']
else:
os.environ['IR_DATASETS_SKIP_DEPRECATED_WARNING'] = restore_env

result = pd.DataFrame(rows, columns=["dataset", "topics", "topics_lang", "qrels", "corpus", "corpus_lang", "index", "info_url"])
if en_only:
topics_filter = (result['topics'].isnull()) | (result['topics_lang'] == 'en')
Expand Down

0 comments on commit 24c5b55

Please sign in to comment.