Skip to content

Commit

Permalink
addresses #227, max_passage() etc should drop docid
Browse files Browse the repository at this point in the history
  • Loading branch information
cmacdonald committed Mar 9, 2022
1 parent 6016eca commit b0c685a
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 1 deletion.
2 changes: 2 additions & 0 deletions pyterrier/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,8 @@ def transform(self, topics_and_res):
#add query columns back
rtr = rtr.merge(topics_and_res[query_columns(topics_and_res)].drop_duplicates(), on='qid')

if "docid" in rtr.columns:
rtr = rtr.drop(columns=['docid'])
rtr = add_ranks(rtr)
return rtr

Expand Down
6 changes: 5 additions & 1 deletion tests/test_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def test_passager(self):
self.assertEqual("sentence", dfoutput["body"][1])

def test_depassager(self):
dfinput = pd.DataFrame([["q1", "a query", "doc1", "title", "body sentence"]], columns=["qid", "query", "docno", "title", "body"])
dfinput = pd.DataFrame([["q1", "a query", "doc1", 1, "title", "body sentence"]], columns=["qid", "query", "docno", "docid", "title", "body"])
passager = pt.text.sliding(length=1, stride=1)
dfpassage = passager(dfinput)
# qid query docno body
Expand All @@ -131,6 +131,7 @@ def test_depassager(self):
self.assertEqual(1, dfmax["score"][0])
self.assertTrue("query" in dfmax.columns)
self.assertTrue("body" in dfmax.columns)
self.assertFalse("docid" in dfmax.columns)
self.assertFalse("pid" in dfmax.columns)
self.assertFalse("olddocno" in dfmax.columns)
self.assertEqual("a query", dfmax["query"][0])
Expand All @@ -140,18 +141,21 @@ def test_depassager(self):
self.assertEqual(1, dffirst["score"][0])
self.assertTrue("body" in dffirst.columns)
self.assertFalse("pid" in dffirst.columns)
self.assertFalse("docid" in dffirst.columns)
self.assertFalse("olddocno" in dffirst.columns)

dfmean = pt.text.mean_passage()(dfpassage)
self.assertEqual(1, len(dfmean))
self.assertEqual(0.5, dfmean["score"][0])
self.assertFalse("pid" in dfmean.columns)
self.assertFalse("docid" in dfmean.columns)
self.assertFalse("olddocno" in dfmax.columns)

dfmeanK = pt.text.kmaxavg_passage(2)(dfpassage)
self.assertEqual(1, len(dfmeanK))
self.assertEqual(0.5, dfmeanK["score"][0])
self.assertFalse("pid" in dfmeanK.columns)
self.assertFalse("docid" in dfmeanK.columns)
self.assertFalse("olddocno" in dfmeanK.columns)

dfscores = pd.DataFrame([
Expand Down

0 comments on commit b0c685a

Please sign in to comment.