From 10ffa8f6237366616d8013b289c5582e3af291bb Mon Sep 17 00:00:00 2001 From: Sean MacAvaney Date: Wed, 25 Sep 2024 20:23:05 +0100 Subject: [PATCH] documentation wip --- docs/artifact.rst | 48 ++++++++++++++++++++++++++++++++ docs/conf.py | 3 +- docs/extras/generate_includes.py | 32 +++++++++++++++++++++ docs/index.rst | 1 + 4 files changed, 83 insertions(+), 1 deletion(-) create mode 100644 docs/artifact.rst diff --git a/docs/artifact.rst b/docs/artifact.rst new file mode 100644 index 00000000..7737c75c --- /dev/null +++ b/docs/artifact.rst @@ -0,0 +1,48 @@ +Artifact API +------------------------------------------------ + +PyTerrier's Artifact API provides a powerful way to share resources, such as indexes, +cached results, and more. Re-using one another's artifacts is a great way to help achieve +green (i.e., sustainable) research [#]_. + +The API is provided by the :class:`~pyterrier.Artifact` classs, which includes methods +for sharing artifacts using a variety of services, such as HuggingFace Hub and Zenodo. + +.. note:: + **What is an Artifact?** "Artifact" often refers to a broad range of items. For + instance, the `ACM defines `__ + an artifact as: "a digital object that was either created by the authors to be used as part of the study + or generated by the experiment itself." + + In PyTerrier, we use a narrower definition. We treat artifacts as components that + can be represented as a file or directory stored on disk. These are most frequently built indexes, + but can also be resources such as cached pipeline results. + +Working with Artifacts +================================================= + +TODO + +Artifact Implementations +================================================= + +Here's a list of existing :class:`~pyterrier.Artifact` implementations. (If you've added one, +feel free to make a PR to this page to add it!) + +.. To add to this list, edit extras/generate_includes.py +.. include:: ./_includes/artifact_list.rst + +Advanced: Writing Your Own Artifact +================================================= + +TODO: code, entry points + +Advanced: Writing Custom Artifact URL Schemes +================================================= + +TODO + +---- + +.. [#] See: Scells, Zhuang, and Zuccon. `Reduce, Reuse, Recycle: Green Information Retrieval Research + `_. SIGIR 2022. diff --git a/docs/conf.py b/docs/conf.py index b8db8072..ea9ff185 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -24,10 +24,11 @@ from extras import generate_includes from extras import generate_extensions +generate_includes.setup() if not "QUICK" in os.environ: - generate_includes.setup() generate_includes.dataset_include() generate_includes.experiment_includes() +generate_includes.artifact_list_include() generate_extensions.generate_extensions() # -- Project information ----------------------------------------------------- diff --git a/docs/extras/generate_includes.py b/docs/extras/generate_includes.py index 42dd4ddf..dd82c1af 100644 --- a/docs/extras/generate_includes.py +++ b/docs/extras/generate_includes.py @@ -138,3 +138,35 @@ def experiment_includes(): ).head().to_markdown(tablefmt="rst") with open("_includes/experiment-perq.rst", "wt") as f: f.write(table) + + +def artifact_list_include(): + table = [ + {'class': 'pyterrier.terrier.TerrierIndex', 'package': 'python-terrier', 'package_url': 'https://github.com/terrier-org/pyterrier', 'type': 'sparse_index', 'format': 'terrier'}, + {'class': 'pyterrier_pisa.PisaIndex', 'package': 'pyterrier-pisa', 'package_url': 'https://github.com/terrierteam/pyterrier_pisa', 'type': 'sparse_index', 'format': 'pisa'}, + {'class': 'pyterrier_anserini.AnseriniIndex', 'package': 'pyterrier-anserini', 'package_url': 'https://github.com/seanmacavaney/pyterrier-anserini', 'type': 'sparse_index', 'format': 'anserini'}, + {'class': 'pyterrier_adaptive.corpus_graph.NpTopKCorpusGraph', 'package': 'pyterrier-adaptive', 'package_url': 'https://github.com/terrierteam/pyterrier-adaptive', 'type': 'corpus_graph', 'format': 'np_topk'}, + {'class': 'pyterrier_ciff.CiffIndex', 'package': 'pyterrier-ciff', 'package_url': 'https://github.com/seanmacavaney/pyterrier-ciff', 'type': 'sparse_index', 'format': 'ciff'}, + {'class': 'pyterrier_dr.FlexIndex', 'package': 'pyterrier-dr', 'package_url': 'https://github.com/terrierteam/pyterrier_dr', 'type': 'dense_index', 'format': 'flex'}, + {'class': 'pyterrier_quality.QualCache', 'package': 'pyterrier-quality', 'package_url': 'https://github.com/terrierteam/pyterrier-quality', 'type': 'quality_score_cache', 'format': 'numpy'}, + {'class': 'pyterrier_caching.Lz4PickleIndexerCache', 'package': 'pyterrier-caching', 'package_url': 'https://github.com/seanmacavaney/pyterrier-caching', 'type': 'indexer_cache', 'format': 'lz4pickle'}, + {'class': 'pyterrier_caching.DbmRetrieverCache', 'package': 'pyterrier-caching', 'package_url': 'https://github.com/seanmacavaney/pyterrier-caching', 'type': 'retriever_cache', 'format': 'dbm.dumb'}, + {'class': 'pyterrier_caching.Hdf5ScorerCache', 'package': 'pyterrier-caching', 'package_url': 'https://github.com/seanmacavaney/pyterrier-caching', 'type': 'scorer_cache', 'format': 'hdf5'}, + ] + with open("_includes/artifact_list.rst", "wt") as f: + f.write(''' +.. list-table:: + :header-rows: 1 + + * - Class + - Package + - Type / Format + - Links +''') + for rec in table: + f.write(''' + * - :class:`~{class}` + - `{package} <{package_url}>`_ + - ``{type}``/``{format}`` + - `HuggingFace `__ +'''.format(**rec)) diff --git a/docs/index.rst b/docs/index.rst index 72a47b8e..3f07b0a1 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -12,6 +12,7 @@ Welcome to PyTerrier's documentation! experiments rewrite ltr + artifact .. toctree:: :maxdepth: 1