Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pt.terrier.rewrite revisions - remove Axiomatic, remove terrier-prf #472

Merged
merged 3 commits into from
Aug 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion docs/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,6 @@ These options adjust how the Terrier engine is loaded.

.. autofunction:: pyterrier.terrier.set_version
.. autofunction:: pyterrier.terrier.set_helper_version
.. autofunction:: pyterrier.terrier.set_prf_version
.. autofunction:: pyterrier.terrier.set_property
.. autofunction:: pyterrier.terrier.set_properties
.. autofunction:: pyterrier.terrier.extend_classpath
Expand Down
2 changes: 1 addition & 1 deletion docs/pipeline_examples.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ pipe = (pt.terrier.Retriever(wikipedia_index, wmodel="BM25") >>

### RM3 Query Expansion

We also provide RM3 query expansion, using an external plugin to Terrier called [terrier-prf](https://github.com/terrierteam/terrier-prf).
We also provide RM3 query expansion.

```python
pipe = (pt.terrier.Retriever(indexref, wmodel="BM25") >>
Expand Down
11 changes: 0 additions & 11 deletions docs/rewrite.rst
Original file line number Diff line number Diff line change
Expand Up @@ -109,17 +109,6 @@ RM3
References:
- Nasreen Abdul-Jaleel, James Allan, W Bruce Croft, Fernando Diaz, Leah Larkey, Xiaoyan Li, Mark D Smucker, and Courtney Wade. UMass at TREC 2004: Novelty and HARD. In Proceedings of TREC 2004.


AxiomaticQE
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

.. autoclass:: pyterrier.rewrite.AxiomaticQE
:members: transform

References:
- Hui Fang, Chang Zhai.: Semantic term matching in axiomatic approaches to information retrieval. In: Proceedings of the 29th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 115–122. SIGIR 2006. ACM, New York (2006).
- Peilin Yang and Jimmy Lin, Reproducing and Generalizing Semantic Term Matching in Axiomatic Information Retrieval. In Proceedings of ECIR 2019.

Combining Query Formulations
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Expand Down
4 changes: 2 additions & 2 deletions pyterrier/terrier/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# java stuff
from pyterrier.terrier import java
from pyterrier.terrier._text_loader import TerrierTextLoader, terrier_text_loader
from pyterrier.terrier.java import configure, set_version, set_helper_version, set_prf_version, extend_classpath, J, set_property, set_properties, run, version, check_version, check_helper_version
from pyterrier.terrier.java import configure, set_version, set_helper_version, extend_classpath, J, set_property, set_properties, run, version, check_version, check_helper_version
from pyterrier.terrier.retriever import Retriever, FeaturesRetriever, TextScorer
from pyterrier.terrier.index_factory import IndexFactory
from pyterrier.terrier.stemmer import TerrierStemmer
Expand Down Expand Up @@ -42,7 +42,7 @@ def from_dataset(*args, **kwargs):

__all__ = [
# java stuff
'java', 'configure', 'set_version', 'set_helper_version', 'set_prf_version', 'extend_classpath', 'J', 'version', 'check_version', 'check_helper_version',
'java', 'configure', 'set_version', 'set_helper_version', 'extend_classpath', 'J', 'version', 'check_version', 'check_helper_version',

# retrieval
'Retriever', 'BatchRetrieve', 'TerrierRetrieve', 'FeaturesRetriever', 'FeaturesBatchRetrieve', 'TerrierRetrieve', 'TextScorer',
Expand Down
16 changes: 1 addition & 15 deletions pyterrier/terrier/java.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
'helper_version': os.environ.get("TERRIER_HELPER_VERSION") or None,
'boot_packages': [],
'force_download': True,
'prf_version': '-SNAPSHOT',
})

@pt.java.before_init
Expand All @@ -30,11 +29,6 @@ def set_helper_version(version: Optional[str] = None):
configure['helper_version'] = version


@pt.java.before_init
def set_prf_version(version: Optional[str] = None):
configure['prf_version'] = version


class TerrierJavaInit(pt.java.JavaInitializer):
def priority(self) -> int:
return -10 # between pt.java.core (-100) and default (0) to load earlier than extensions
Expand Down Expand Up @@ -66,9 +60,6 @@ def pre_init(self, jnius_config):
helper_jar = pt.java.mavenresolver.get_package_jar(TERRIER_PKG, 'terrier-python-helper', helper_version)
jnius_config.add_classpath(helper_jar)

prf_jar = pt.java.mavenresolver.get_package_jar('com.github.terrierteam', 'terrier-prf', configure['prf_version'])
jnius_config.add_classpath(prf_jar)

# This is for parallel -- it means that when re-configured in a parallel process, force_download will be False
# and mavenresolver will use the version that was just downloaded above (not try to do it again).
configure['force_download'] = False
Expand Down Expand Up @@ -140,11 +131,7 @@ def message(self):
if "BUILD_DATE" in dir(J.Version):
version_string += f" (build: {J.Version.BUILD_USER} {J.Version.BUILD_DATE})"

res = f"version={version_string}, helper_version={configure['helper_version']}"
if configure['prf_version'] is not None:
res += f" prf_version={configure['prf_version']}"

return res
return f"version={version_string}, helper_version={configure['helper_version']}"

def _post_init_index(self, jnius):
@pt.java.required
Expand Down Expand Up @@ -627,5 +614,4 @@ def check_helper_version(min):
QueryResultSet = 'org.terrier.matching.QueryResultSet',
DependenceModelPreProcess = 'org.terrier.querying.DependenceModelPreProcess',
RM3 = 'org.terrier.querying.RM3',
AxiomaticQE = 'org.terrier.querying.AxiomaticQE',
)
31 changes: 1 addition & 30 deletions pyterrier/terrier/rewrite.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,7 @@ def __init__(self, *args, fb_terms=10, fb_docs=3, fb_lambda=0.6, **kwargs):
fb_docs(int): number of feedback documents to consider. Terrier's default setting is 3 feedback documents.
fb_lambda(float): lambda in RM3, i.e. importance of relevance model viz feedback model. Defaults to 0.6.
"""
assert pt.terrier.check_version("5.10"), "Terrier 5.10 required"
rm = pt.terrier.J.RM3()
self.fb_lambda = fb_lambda
kwargs["qeclass"] = rm
Expand All @@ -413,36 +414,6 @@ def transform(self, queries_and_docs):
self.qe.fbDocs = self.fb_docs
return super().transform(queries_and_docs)

@pt.java.required
class AxiomaticQE(QueryExpansion):
'''
Performs query expansion using axiomatic query expansion.

This transformer must be followed by a Terrier Retrieve() transformer.
The original query is saved in the `"query_0"` column, which can be restored using `pt.rewrite.reset()`.

Instance Attributes:
- fb_terms(int): number of feedback terms. Defaults to 10
- fb_docs(int): number of feedback documents. Defaults to 3
'''
def __init__(self, *args, fb_terms=10, fb_docs=3, **kwargs):
"""
Args:
index_like: the Terrier index to use
fb_terms(int): number of terms to add to the query. Terrier's default setting is 10 expansion terms.
fb_docs(int): number of feedback documents to consider. Terrier's default setting is 3 feedback documents.
"""
rm = pt.terrier.J.AxiomaticQE()
self.fb_terms = fb_terms
self.fb_docs = fb_docs
kwargs["qeclass"] = rm
super().__init__(*args, **kwargs)

def transform(self, queries_and_docs):
self.qe.fbTerms = self.fb_terms
self.qe.fbDocs = self.fb_docs
return super().transform(queries_and_docs)

def stash_results(clear=True) -> pt.Transformer:
"""
Stashes (saves) the current retrieved documents for each query into the column `"stashed_results_0"`.
Expand Down
1 change: 0 additions & 1 deletion tests/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ def __init__(self, *args, **kwargs):
if terrier_helper_version is not None:
print("Testing with Terrier Helper version " + terrier_helper_version)

pt.terrier.set_prf_version('rm_tiebreak-SNAPSHOT')
pt.java.set_log_level("DEBUG")
# pt.java.add_option('-ea') can be added here to ensure that all Java assertions are met
pt.java.init()
Expand Down
15 changes: 0 additions & 15 deletions tests/test_rewrite.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,21 +260,6 @@ def test_rm3_expansion_for_query_compact_on_bm25(self):
self.assertEqual(len(actual), 1)
self.assertEqual(normalize_term_weights(expected), normalize_term_weights(actual.iloc[0]["query"]))

def test_axiomatic_qe_expansion_for_query_compact_on_bm25(self):
# just ensure that AxiomaticQE results do not change
expected = 'applypipeline:off compact^1.000000000'

indexref = pt.datasets.get_dataset("vaswani").get_index()
queriesIn = pd.DataFrame([["1", "compact"]], columns=["qid", "query"])

qe = pt.rewrite.AxiomaticQE(indexref)
br = pt.terrier.Retriever(indexref, wmodel='BM25')

actual = qe.transform(br.transform(queriesIn))

self.assertEqual(len(actual), 1)
self.assertEqual(expected, actual.iloc[0]["query"])

def test_kl_qe_expansion_for_query_compact_on_bm25(self):
# just ensure that KLQueryExpansion results do not change
expected = 'applypipeline:off compact^1.840895333 design^0.348370740 equip^0.000000000 purpos^0.000000000 instrument^0.000000000 ferrit^0.000000000 anod^0.000000000 aircraft^0.000000000 microwav^0.000000000 sideband^0.000000000'
Expand Down