diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index 86f0a37f..3766e449 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -6,8 +6,7 @@ name: Continuous Testing on: push: branches: [ master ] - pull_request: - branches: [ master ] + pull_request: {} jobs: build: @@ -79,6 +78,13 @@ jobs: # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide #flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + - name: RM3 unit tests + env: + TERRIER_VERSION: ${{ matrix.terrier }} + run: | + pytest -p no:faulthandler tests/test_rewrite_rm3.py + # Hide underlying Jnius problem by disabling faulthandler: https://github.com/pytest-dev/pytest/issues/7634 + - name: Flash unit tests env: TERRIER_VERSION: ${{ matrix.terrier }} diff --git a/docs/index.rst b/docs/index.rst index af7ed735..e2b3bb64 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -46,6 +46,7 @@ Welcome to PyTerrier's documentation! PyTerrier_T5 PyTerrier_GenRank PyTerrier_ColBERT + PyTerrier_ChatNoir PyTerrier_ANCE PyTerrier_doc2query PyTerrier_DeepCT diff --git a/pyterrier/java/_utils.py b/pyterrier/java/_utils.py index 338be36e..ee1107f9 100644 --- a/pyterrier/java/_utils.py +++ b/pyterrier/java/_utils.py @@ -209,8 +209,11 @@ def legacy_init(version=None, mem=None, packages=[], jvm_opts=[], redirect_io=Tr deprecated_calls.append(f'pt.java.set_log_level({logging!r})') for package in boot_packages: - pt.java.add_package(*package.split(':')) # format: org:package:version:filetype (where version and filetype are optional) - deprecated_calls.append(f'pt.java.add_package({package!r})') + # format: org:package:version:filetype (where version and filetype are optional) + pkg_split = package.split(':') + pkg_string = ", ".join(f'{w!r}' for w in pkg_split) + pt.java.add_package(*pkg_split) + deprecated_calls.append(f'pt.java.add_package({pkg_string})') for opt in jvm_opts: pt.java.add_option(opt) diff --git a/pyterrier/terrier/java.py b/pyterrier/terrier/java.py index 422121d3..13821af8 100644 --- a/pyterrier/terrier/java.py +++ b/pyterrier/terrier/java.py @@ -23,17 +23,20 @@ @pt.java.before_init def set_version(version: Optional[str] = None): - configure['terrier_version'] = version + if version is not None: + configure['terrier_version'] = version @pt.java.before_init def set_helper_version(version: Optional[str] = None): - configure['helper_version'] = version + if version is not None: + configure['helper_version'] = version @pt.java.before_init def set_prf_version(version: Optional[str] = None): - configure['prf_version'] = version + if version is not None: + configure['prf_version'] = version class TerrierJavaInit(pt.java.JavaInitializer): diff --git a/pyterrier/terrier/rewrite.py b/pyterrier/terrier/rewrite.py index 6152d5d8..ca33f3d3 100644 --- a/pyterrier/terrier/rewrite.py +++ b/pyterrier/terrier/rewrite.py @@ -170,7 +170,7 @@ class QueryExpansion(pt.Transformer): ''' - def __init__(self, index_like, fb_terms=10, fb_docs=3, qeclass="org.terrier.querying.QueryExpansion", verbose=0, properties={}, **kwargs): + def __init__(self, index_like, fb_terms=10, fb_docs=3, qeclass="org.terrier.querying.QueryExpansion", verbose=0, properties={}, requires_scores=False, **kwargs): super().__init__(**kwargs) self.verbose = verbose if isinstance(qeclass, str): @@ -185,6 +185,7 @@ def __init__(self, index_like, fb_terms=10, fb_docs=3, qeclass="org.terrier.quer self.fb_terms = fb_terms self.fb_docs = fb_docs self.manager = pt.terrier.J.ManagerFactory._from_(self.indexref) + self.requires_scores = requires_scores def __reduce__(self): return ( @@ -216,30 +217,38 @@ def __setstate__(self, d): def _populate_resultset(self, topics_and_res, qid, index): - docids=None - scores=None - occurrences=None + docids = None + scores = None + occurrences = None if "docid" in topics_and_res.columns: # we need .tolist() as jnius cannot convert numpy arrays - docids = topics_and_res[topics_and_res["qid"] == qid]["docid"].values.tolist() + topics_and_res_for_qid = topics_and_res[topics_and_res["qid"] == qid] + docids = topics_and_res_for_qid["docid"].values.tolist() scores = [0.0] * len(docids) + if self.requires_scores: + scores = topics_and_res_for_qid["score"].values.tolist() occurrences = [0] * len(docids) elif "docno" in topics_and_res.columns: - docnos = topics_and_res[topics_and_res["qid"] == qid]["docno"].values + topics_and_res_for_qid = topics_and_res[topics_and_res["qid"] == qid] + docnos = topics_and_res_for_qid["docno"].values docids = [] scores = [] + _scores = [0.0] * len(docids) + if self.requires_scores: + _scores = topics_and_res_for_qid["score"].values.tolist() + occurrences = [] metaindex = index.getMetaIndex() skipped = 0 - for docno in docnos: + for docno, docscore in zip(docnos, _scores): docid = metaindex.getDocument("docno", docno) if docid == -1: skipped +=1 assert docid != -1, "could not match docno" + docno + " to a docid for query " + qid docids.append(docid) - scores.append(0.0) occurrences.append(0) + scores.append(docscore) if skipped > 0: if skipped == len(docnos): warn("*ALL* %d feedback docnos for qid %s could not be found in the index" % (skipped, qid)) @@ -384,7 +393,7 @@ def __init__(self, *args, fb_terms=10, fb_docs=3, fb_lambda=0.6, **kwargs): rm = pt.terrier.J.RM3() self.fb_lambda = fb_lambda kwargs["qeclass"] = rm - super().__init__(*args, fb_terms=fb_terms, fb_docs=fb_docs, **kwargs) + super().__init__(*args, fb_terms=fb_terms, fb_docs=fb_docs, requires_scores=True, **kwargs) def __getstate__(self): rtr = super().__getstate__() diff --git a/tests/test_rewrite.py b/tests/test_rewrite.py index 7b1f436d..90493e13 100644 --- a/tests/test_rewrite.py +++ b/tests/test_rewrite.py @@ -221,39 +221,41 @@ def _sdm(self, freq): pt.Evaluate(br_sdm.transform(t), dataset.get_qrels(), metrics=["map"])["map"], places=4) - def test_rm3(self): - dataset = pt.datasets.get_dataset("vaswani") - indexref = dataset.get_index() - - qe = pt.rewrite.RM3(indexref) - br = pt.BatchRetrieve(indexref) - - queriesIn = pd.DataFrame([["1", "compact"]], columns=["qid", "query"]) - res = br.transform(queriesIn) - - queriesOut = qe.transform(res) - self.assertEqual(len(queriesOut), 1) - query = queriesOut.iloc[0]["query"] - #self.assertTrue("compact^1.82230972" in query) - self.assertTrue("applypipeline:off " in query) + # RM3 cannot be tested with current jnius, as it must be placed into the boot classpath + # As workaround for the moment, those RM3 tests are implemented in a single file tests/test_rewrite_rm3.py that is skipped when executing the complete pipeline, but are executed when run in isolation. + # def test_rm3(self): + # dataset = pt.datasets.get_dataset("vaswani") + # indexref = dataset.get_index() + + # qe = pt.rewrite.RM3(indexref) + # br = pt.BatchRetrieve(indexref) + + # queriesIn = pd.DataFrame([["1", "compact"]], columns=["qid", "query"]) + # res = br.transform(queriesIn) + + # queriesOut = qe.transform(res) + # self.assertEqual(len(queriesOut), 1) + # query = queriesOut.iloc[0]["query"] + # #self.assertTrue("compact^1.82230972" in query) + # self.assertTrue("applypipeline:off " in query) - pipe = br >> qe >> br + # pipe = br >> qe >> br - # lets go faster, we only need 18 topics. qid 16 had a tricky case - t = dataset.get_topics().head(18) + # # lets go faster, we only need 18 topics. qid 16 had a tricky case + # t = dataset.get_topics().head(18) - all_qe_res = pipe.transform(t) - map_pipe = pt.Evaluate(all_qe_res, dataset.get_qrels(), metrics=["map"])["map"] + # all_qe_res = pipe.transform(t) + # map_pipe = pt.Evaluate(all_qe_res, dataset.get_qrels(), metrics=["map"])["map"] - br_qe = pt.BatchRetrieve(indexref, - controls={"qe":"on"}, - properties={"querying.processes" : "terrierql:TerrierQLParser,parsecontrols:TerrierQLToControls,"\ - +"parseql:TerrierQLToMatchingQueryTerms,matchopql:MatchingOpQLParser,applypipeline:ApplyTermPipeline,"\ - +"sd:DependenceModelPreProcess,localmatching:LocalManager$ApplyLocalMatching,qe:RM3,"\ - +"labels:org.terrier.learning.LabelDecorator,filters:LocalManager$PostFilterProcess"}) - map_qe = pt.Evaluate(br_qe.transform(t), dataset.get_qrels(), metrics=["map"])["map"] + # br_qe = pt.BatchRetrieve(indexref, + # controls={"qe":"on"}, + # properties={"querying.processes" : "terrierql:TerrierQLParser,parsecontrols:TerrierQLToControls,"\ + # +"parseql:TerrierQLToMatchingQueryTerms,matchopql:MatchingOpQLParser,applypipeline:ApplyTermPipeline,"\ + # +"sd:DependenceModelPreProcess,localmatching:LocalManager$ApplyLocalMatching,qe:RM3,"\ + # +"labels:org.terrier.learning.LabelDecorator,filters:LocalManager$PostFilterProcess"}) + # map_qe = pt.Evaluate(br_qe.transform(t), dataset.get_qrels(), metrics=["map"])["map"] - self.assertAlmostEqual(map_qe, map_pipe, places=2) + # self.assertAlmostEqual(map_qe, map_pipe, places=2) def test_linear_terrierql(self): pipe = pt.apply.query(lambda row: "az") >> pt.rewrite.linear(0.75, 0.25) diff --git a/tests/test_rewrite_rm3.py b/tests/test_rewrite_rm3.py new file mode 100644 index 00000000..21f0a208 --- /dev/null +++ b/tests/test_rewrite_rm3.py @@ -0,0 +1,174 @@ +import pandas as pd +import pyterrier as pt +import os +from matchpy import * +from .base import TempDirTestCase +import pytest + +if not pt.java.started(): + terrier_version = os.environ.get("TERRIER_VERSION", None) + terrier_helper_version = os.environ.get("TERRIER_HELPER_VERSION", None) + pt.java.set_log_level('DEBUG') + pt.terrier.set_version(terrier_version) + pt.terrier.set_helper_version(terrier_helper_version) + pt.terrier.set_prf_version('rm_tiebreak-SNAPSHOT') + pt.java.init() # optional, forces java initialisation + TERRIER_PRF_ON_CLASSPATH = True +else: + TERRIER_PRF_ON_CLASSPATH = False + + +def normalize_term_weights(term_weights, digits=7): + ret = '' + for i in term_weights.split(): + if '^' in i: + i = i.split('^') + i = i[0] + '^' + i[1][:digits] + ret += ' ' + i + return ret.strip() + +class TestRewriteRm3(TempDirTestCase): + """This is a set of unit tests for RM3 that can currently not run in the complete test suite, as the "com.github.terrierteam:terrier-prf:-SNAPSHOT" would have to be added to the boot classpath. + As workaround, the RM3 tests that can not be executed within the complete test suite are added to this dedicated file, so that they can be executed in isolation by running pytest tests/test_rewrite_rm3.py. + """ + + @pytest.mark.skipif(not TERRIER_PRF_ON_CLASSPATH, reason="This test only works in isolation when terrier-prf is on the jnius classpath.") + def test_rm3_expansion_for_query_compact_on_tf_idf(self): + # top-retrieval results of TF-IDF and BM25 below change, so the RM3 weights differ + expected = 'applypipeline:off equip^0.037346367 ferrit^0.027371584 modul^0.027371584 suppli^0.037346367 design^0.056739070 microwav^0.027371584 anod^0.037346367 unit^0.037346367 compact^0.674414337 stabil^0.037346367' + + indexref = pt.datasets.get_dataset("vaswani").get_index() + queriesIn = pd.DataFrame([["1", "compact"]], columns=["qid", "query"]) + + qe = pt.rewrite.RM3(indexref) + br = pt.BatchRetrieve(indexref, wmodel='TF_IDF') + + actual = qe.transform(br.transform(queriesIn)) + + self.assertEqual(len(actual), 1) + self.assertEqual(normalize_term_weights(expected), normalize_term_weights(actual.iloc[0]["query"])) + + @pytest.mark.skipif(not TERRIER_PRF_ON_CLASSPATH, reason="This test only works in isolation when terrier-prf is on the jnius classpath.") + def test_rm3_expansion_for_query_compact_on_bm25(self): + # top-retrieval results of BM25 and TF-IDF above change, so the RM3 weights differ + expected = 'applypipeline:off equip^0.040264644 ferrit^0.025508024 modul^0.025508024 suppli^0.040264644 design^0.051008239 microwav^0.025508024 anod^0.040264644 unit^0.040264644 compact^0.671144485 stabil^0.040264644' + + indexref = pt.datasets.get_dataset("vaswani").get_index() + queriesIn = pd.DataFrame([["1", "compact"]], columns=["qid", "query"]) + + qe = pt.rewrite.RM3(indexref) + br = pt.BatchRetrieve(indexref, wmodel='BM25') + + actual = qe.transform(br.transform(queriesIn)) + + self.assertEqual(len(actual), 1) + self.assertEqual(normalize_term_weights(expected), normalize_term_weights(actual.iloc[0]["query"])) + + @pytest.mark.skipif(not TERRIER_PRF_ON_CLASSPATH, reason="This test only works in isolation when terrier-prf is on the jnius classpath.") + def test_axiomatic_qe_expansion_for_query_compact_on_bm25(self): + # just ensure that AxiomaticQE results do not change + expected = 'applypipeline:off compact^1.000000000' + + indexref = pt.datasets.get_dataset("vaswani").get_index() + queriesIn = pd.DataFrame([["1", "compact"]], columns=["qid", "query"]) + + qe = pt.rewrite.AxiomaticQE(indexref) + br = pt.BatchRetrieve(indexref, wmodel='BM25') + + actual = qe.transform(br.transform(queriesIn)) + + self.assertEqual(len(actual), 1) + self.assertEqual(expected, actual.iloc[0]["query"]) + + def test_kl_qe_expansion_for_query_compact_on_bm25(self): + # just ensure that KLQueryExpansion results do not change + expected = 'applypipeline:off compact^1.840895333 design^0.348370740 equip^0.000000000 purpos^0.000000000 instrument^0.000000000 ferrit^0.000000000 anod^0.000000000 aircraft^0.000000000 microwav^0.000000000 sideband^0.000000000' + + indexref = pt.datasets.get_dataset("vaswani").get_index() + queriesIn = pd.DataFrame([["1", "compact"]], columns=["qid", "query"]) + + qe = pt.rewrite.KLQueryExpansion(indexref) + br = pt.BatchRetrieve(indexref, wmodel='BM25') + + actual = qe.transform(br.transform(queriesIn)) + + self.assertEqual(len(actual), 1) + self.assertEqual(expected, actual.iloc[0]["query"]) + + def test_bo1_qe_expansion_for_query_compact_on_bm25(self): + # just ensure that Bo1QueryExpansion results do not change + expected = 'applypipeline:off compact^1.822309726 design^0.287992096 equip^0.000000000 purpos^0.000000000 instrument^0.000000000 ferrit^0.000000000 anod^0.000000000 aircraft^0.000000000 microwav^0.000000000 sideband^0.000000000' + + indexref = pt.datasets.get_dataset("vaswani").get_index() + queriesIn = pd.DataFrame([["1", "compact"]], columns=["qid", "query"]) + + qe = pt.rewrite.Bo1QueryExpansion(indexref) + br = pt.BatchRetrieve(indexref, wmodel='BM25') + + actual = qe.transform(br.transform(queriesIn)) + + self.assertEqual(len(actual), 1) + self.assertEqual(expected, actual.iloc[0]["query"]) + + def test_dfr_qe_expansion_for_query_compact_on_bm25(self): + # just ensure that DFRQueryExpansion results do not change + expected = 'applypipeline:off compact^1.822309726 design^0.287992096 equip^0.000000000 purpos^0.000000000 instrument^0.000000000 ferrit^0.000000000 anod^0.000000000 aircraft^0.000000000 microwav^0.000000000 sideband^0.000000000' + + indexref = pt.datasets.get_dataset("vaswani").get_index() + queriesIn = pd.DataFrame([["1", "compact"]], columns=["qid", "query"]) + + qe = pt.rewrite.DFRQueryExpansion(indexref) + br = pt.BatchRetrieve(indexref, wmodel='BM25') + + actual = qe.transform(br.transform(queriesIn)) + + self.assertEqual(len(actual), 1) + self.assertEqual(expected, actual.iloc[0]["query"]) + + @pytest.mark.skipif(not TERRIER_PRF_ON_CLASSPATH, reason="This test only works in isolation when terrier-prf is on the jnius classpath.") + def test_rm3_end_to_end(self): + """An end-to-end test, contrasting the smaller tests (that fail faster) from above. + """ + dataset = pt.datasets.get_dataset("vaswani") + indexref = dataset.get_index() + + qe = pt.rewrite.RM3(indexref) + br = pt.BatchRetrieve(indexref) + + queriesIn = pd.DataFrame([["1", "compact"]], columns=["qid", "query"]) + res = br.transform(queriesIn) + + queriesOut = qe.transform(res) + self.assertEqual(len(queriesOut), 1) + query = queriesOut.iloc[0]["query"] + #self.assertTrue("compact^1.82230972" in query) + self.assertTrue("applypipeline:off " in query) + + pipe = br >> qe >> br + + # lets go faster, we only need 18 topics. qid 16 had a tricky case + t = dataset.get_topics().head(18) + + all_qe_res = pipe.transform(t) + map_pipe = pt.Evaluate(all_qe_res, dataset.get_qrels(), metrics=["map"])["map"] + + br_qe = pt.BatchRetrieve(indexref, + controls={"qe":"on"}, + properties={"querying.processes" : "terrierql:TerrierQLParser,parsecontrols:TerrierQLToControls,"\ + +"parseql:TerrierQLToMatchingQueryTerms,matchopql:MatchingOpQLParser,applypipeline:ApplyTermPipeline,"\ + +"sd:DependenceModelPreProcess,localmatching:LocalManager$ApplyLocalMatching,qe:RM3,"\ + +"labels:org.terrier.learning.LabelDecorator,filters:LocalManager$PostFilterProcess"}) + map_qe = pt.Evaluate(br_qe.transform(t), dataset.get_qrels(), metrics=["map"])["map"] + + self.assertAlmostEqual(map_qe, map_pipe, places=2) + + @pytest.mark.skipif(not TERRIER_PRF_ON_CLASSPATH, reason="This test only works in isolation when terrier-prf is on the jnius classpath.") + def test_scoring_rm3_qe(self): + expected = 'applypipeline:off fox^0.600000024' + input = pd.DataFrame([["q1", "fox", "d1", "all the fox were fox", 3], ["q1", "fox", "d2", "brown fox jumps", 2]], columns=["qid", "query", "docno", "body", "score"]) + scorer = pt.terrier.retriever.TextIndexProcessor(pt.rewrite.RM3, takes="docs", returns="queries") + rtr = scorer(input) + self.assertTrue("qid" in rtr.columns) + self.assertTrue("query" in rtr.columns) + self.assertTrue("docno" not in rtr.columns) + self.assertTrue(expected, rtr.iloc[0]["query"])