-
Notifications
You must be signed in to change notification settings - Fork 65
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
* fix empty_Q() * addresses pt.apply.new_column fails on empty dataframes #219 * fixes for version='snapshot' * commit of debug transformer #220
- Loading branch information
1 parent
4ac9221
commit 02c4b64
Showing
9 changed files
with
114 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
.. _pyterrier.debug: | ||
|
||
pyterrier.debug - Transformers for Debugging | ||
-------------------------------------------- | ||
|
||
Its very easy to write complex pipelines with PyTerrier. Sometimes you need to inspect dataframes in the middle of a pipeline. | ||
The pt.debug transformers display the columns or the data, and can be inserted into pipelines during development. | ||
|
||
Debug Methods | ||
============= | ||
|
||
.. automodule:: pyterrier.debug | ||
:members: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -36,6 +36,7 @@ Welcome to PyTerrier's documentation! | |
apply | ||
anserini | ||
new | ||
debug | ||
|
||
.. toctree:: | ||
:maxdepth: 1 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
from .transformer import TransformerBase | ||
from typing import List | ||
|
||
def print_columns(by_query : bool = False, message : str = None) -> TransformerBase: | ||
""" | ||
Returns a transformer that can be inserted into pipelines that can print the column names of the dataframe | ||
at this stage in the pipeline: | ||
Arguments: | ||
- by_query(bool): whether to display for each query. Defaults to False. | ||
- message(str): whether to display a message before printing. Defaults to None, which means no message. This | ||
is useful when print_columns() is being used multiple times within a pipeline | ||
Example:: | ||
pipe = ( | ||
bm25 | ||
>> pt.debug.print_columns() | ||
>> pt.rewrite.RM3() | ||
>> pt.debug.print_columns() | ||
bm25 | ||
When the above pipeline is executed, two sets of columns will be displayed | ||
- `["qid", "query", "docno", "rank", "score"]` - the output of BM25, a ranking of documents | ||
- `["qid", "query", "query_0"]` - the output of RM3, a reformulated query | ||
""" | ||
import pyterrier as pt | ||
def _do_print(df): | ||
if message is not None: | ||
print(message) | ||
print(df.columns) | ||
return df | ||
return pt.apply.by_query(_do_print) if by_query else pt.apply.generic(_do_print) | ||
|
||
def print_rows( | ||
by_query : bool = True, | ||
jupyter: bool = True, | ||
head : int = 2, | ||
message : str = None, | ||
columns : List[str] = None) -> TransformerBase: | ||
""" | ||
Returns a transformer that can be inserted into pipelines that can print some of the dataframe | ||
at this stage in the pipeline: | ||
Arguments: | ||
- by_query(bool): whether to display for each query. Defaults to True. | ||
- jupyter(bool): Whether to use IPython's display function to display the dataframe. Defaults to True. | ||
- head(int): The number of rows to display. None means all rows. | ||
- columns(List[str]): Limit the columns for which data is displayed. Default of None displays all columns. | ||
- message(str): whether to display a message before printing. Defaults to None, which means no message. This | ||
is useful when print_rows() is being used multiple times within a pipeline | ||
Example:: | ||
pipe = ( | ||
bm25 | ||
>> pt.debug.print_rows() | ||
>> pt.rewrite.RM3() | ||
>> pt.debug.print_rows() | ||
bm25 | ||
""" | ||
import pyterrier as pt | ||
def _do_print(df): | ||
if message is not None: | ||
print(message) | ||
render = df if head is None else df.head(head) | ||
if columns is not None: | ||
render = render[columns] | ||
if jupyter: | ||
from IPython.display import display | ||
display(render) | ||
else: | ||
print(render) | ||
return df | ||
return pt.apply.by_query(_do_print) if by_query else pt.apply.generic(_do_print) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters