diff --git a/docs/transformer.rst b/docs/transformer.rst index 842e0462..bbc00023 100644 --- a/docs/transformer.rst +++ b/docs/transformer.rst @@ -55,9 +55,33 @@ This class is the base class for all transformers. .. autoclass:: pyterrier.Transformer :members: -Moreover, by extending Transformer, all transformer implementations gain the necessary "dunder" methods (e.g. ``__rshift__()``) + +Default Method +,,,,,,,,,,,,,,,, + +You can invoke a transformer's transfor method simply by calling the default method. If ``t`` is a transformer:: + + df_in = pt.new.queries(['test query'], qid=['q1']) + df_out = t.transform(df_in) + df_out = t(df_in) + +The default method can also detect iterable dictionaries, and pass those directly to ``transform_iter()`` +(which typically calls ``transform()``). So the following expression is equivalent to the examples in the +previous code block:: + + df_out = t([{'qid' : 'q1', 'query' : 'test query'}]) + +This can be more succinct than creating new dataframes for testing transformer implementations. + + +Operator Support +,,,,,,,,,,,,,,,, + +By extending Transformer, all transformer implementations gain the necessary "dunder" methods (e.g. ``__rshift__()``) to support the transformer operators (`>>`, `+` etc). NB: This class used to be called ``pyterrier.transformer.TransformerBase`` + + .. _pt.transformer.estimator: Estimator diff --git a/pyterrier/transformer.py b/pyterrier/transformer.py index e379834f..38978227 100644 --- a/pyterrier/transformer.py +++ b/pyterrier/transformer.py @@ -98,7 +98,8 @@ def transform(self, topics_or_res : pd.DataFrame) -> pd.DataFrame: def transform_iter(self, input: Iterable[dict]) -> pd.DataFrame: """ Method that proesses an iter-dict by instantiating it as a dataframe and calling transform(). - Returns the DataFrame returned by transform(). Used in the implementation of index() on a composed + Returns the DataFrame returned by transform(). This can be a handier version of transform() + that avoids constructing a dataframe by hand. Alo used in the implementation of index() on a composed pipeline. """ return self.transform(pd.DataFrame(list(input))) @@ -213,11 +214,14 @@ def set_parameter(self, name : str, value): raise ValueError(('Invalid parameter name %s for transformer %s. '+ 'Check the list of available parameters') %(name, str(self))) - def __call__(self, *args, **kwargs) -> pd.DataFrame: + def __call__(self, input : Union[pd.DataFrame, Iterable[dict]]) -> pd.DataFrame: """ - Sets up a default method for every transformer, which is aliased to transform(). + Sets up a default method for every transformer, which is aliased to transform() (for DataFrames) + or transform_iter() (for iterable dictionaries) depending on the type of input. """ - return self.transform(*args, **kwargs) + if isinstance(input, pd.DataFrame): + return self.transform(input) + return self.transform_iter(input) def __rshift__(self, right) -> 'Transformer': from .ops import ComposedPipeline diff --git a/tests/test_transformer.py b/tests/test_transformer.py index 181b4235..9006f6df 100644 --- a/tests/test_transformer.py +++ b/tests/test_transformer.py @@ -7,6 +7,14 @@ class TestTransformer(BaseTestCase): + def test_call(self): + inputDocs = pt.new.ranked_documents([[2, 1], [2]], qid=["q100", "q10"]) + t = pt.Transformer.from_df(inputDocs) + self.assertEqual(2, len(t(pt.new.queries(['a'], qid=['q100'])))) + self.assertEqual(1, len(t(pt.new.queries(['a'], qid=['q10'])))) + self.assertEqual(2, len(t([{'qid' : 'q100'}]))) + self.assertEqual(1, len(t([{'qid' : 'q10'}]))) + def test_is_transformer(self): class MyTransformer1(pt.Transformer): pass