Skip to content

Commit

Permalink
temporary fix
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-pmardziel committed Oct 3, 2024
1 parent 8d1a2fe commit 3027a36
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 22 deletions.
11 changes: 10 additions & 1 deletion src/core/trulens/core/database/connector/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,14 +358,21 @@ def get_leaderboard(
self,
app_ids: Optional[List[mod_types_schema.AppID]] = None,
group_by_metadata_key: Optional[str] = None,
limit: Optional[int] = None,
offset: Optional[int] = None,
) -> pandas.DataFrame:
"""Get a leaderboard for the given apps.
Args:
app_ids: A list of app ids to filter records by. If empty or not given, all
apps will be included in leaderboard.
group_by_metadata_key: A key included in record metadata that you want to group results by.
limit: Limit on the number of records to return.
offset: Record row offset.
Returns:
DataFrame of apps with their feedback results aggregated.
If group_by_metadata_key is provided, the DataFrame will be grouped by the specified key.
Expand All @@ -374,7 +381,9 @@ def get_leaderboard(
if app_ids is None:
app_ids = []

df, feedback_cols = self.get_records_and_feedback(app_ids)
df, feedback_cols = self.get_records_and_feedback(
app_ids, limit=limit, offset=offset
)
feedback_cols = sorted(feedback_cols)

df["app_name"] = df["app_json"].apply(
Expand Down
73 changes: 53 additions & 20 deletions src/core/trulens/core/database/orm.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from sqlalchemy import Column
from sqlalchemy import Engine
from sqlalchemy import Float
from sqlalchemy import ForeignKey
from sqlalchemy import Text
from sqlalchemy import UniqueConstraint
from sqlalchemy import event
Expand Down Expand Up @@ -117,7 +118,7 @@ class ORM(abc.ABC, Generic[T]):
Dataset: Type[T]


def new_orm(base: Type[T]) -> Type[ORM[T]]:
def new_orm(base: Type[T], prefix: str = "trulens_") -> Type[ORM[T]]:
"""Create a new orm container from the given base table class."""

class NewORM(ORM):
Expand Down Expand Up @@ -222,7 +223,12 @@ class Record(base):
_table_base_name = "records"

record_id = Column(TYPE_ID, nullable=False, primary_key=True)
app_id = Column(TYPE_ID, nullable=False) # foreign key

# Couldn't figure out how to avoid using prefix in ForeignKey here
# and in the other classes below.
app_id = Column(
TYPE_ID, ForeignKey(f"{prefix}apps.app_id"), nullable=False
)

input = Column(Text)
output = Column(Text)
Expand All @@ -235,10 +241,32 @@ class Record(base):
app = relationship(
"AppDefinition",
backref=backref("records", cascade="all,delete"),
primaryjoin="AppDefinition.app_id == Record.app_id",
foreign_keys=app_id,
order_by="(Record.ts,Record.record_id)",
order_by=(ts, record_id),
)
# NOTE(piotrm): order_by: Temporarily disabling all order_by inside
# relationships as it causes a bug in what I think is sqlalchemy
# when used in conjunction with 2 joinedloads and limit. The problem
# is that an SQL query is generated that uses these order_by fields
# without having selected from the table first due to the use of a
# subquery:
"""
```sql
SELECT anon_1..., trulens_apps_1..., trulens_feedbacks_1...,
FROM (
SELECT
trulens_records.record_id AS record_id,
...
FROM trulens_records LIMIT :param_1
) AS anon_1
LEFT OUTER JOIN trulens_apps AS trulens_apps_1 ...
LEFT OUTER JOIN trulens_feedbacks AS trulens_feedbacks_1 ...
ORDER BY trulens_records.ts, trulens_records.record_id
```
"""
# Notice that trulens_records is in a subquery and named "anon_1"
# but then it is used in the ORDER BY outside of the subquery
# referred to by its non-aliased name that was never selected in the
# main query.

@classmethod
def parse(
Expand Down Expand Up @@ -278,10 +306,16 @@ class FeedbackResult(base):
feedback_result_id = Column(
TYPE_ID, nullable=False, primary_key=True
)
record_id = Column(TYPE_ID, nullable=False) # foreign key
record_id = Column(
TYPE_ID,
ForeignKey(f"{prefix}records.record_id"),
nullable=False,
)
feedback_definition_id = Column(
TYPE_ID, nullable=False
) # foreign key
TYPE_ID,
ForeignKey(f"{prefix}feedback_defs.feedback_definition_id"),
nullable=False,
)
last_ts = Column(TYPE_TIMESTAMP, nullable=False)
status = Column(TYPE_ENUM, nullable=False)
error = Column(Text)
Expand All @@ -294,18 +328,16 @@ class FeedbackResult(base):
record = relationship(
"Record",
backref=backref("feedback_results", cascade="all,delete"),
primaryjoin="Record.record_id == FeedbackResult.record_id",
foreign_keys=record_id,
order_by="(FeedbackResult.last_ts,FeedbackResult.feedback_result_id)",
# order_by=(last_ts, feedback_result_id),
)
# See NOTE(piotrm): order_by.

feedback_definition = relationship(
"FeedbackDefinition",
backref=backref("feedback_results", cascade="all,delete"),
primaryjoin="FeedbackDefinition.feedback_definition_id == FeedbackResult.feedback_definition_id",
foreign_keys=feedback_definition_id,
order_by="(FeedbackResult.last_ts,FeedbackResult.feedback_result_id)",
# order_by=(last_ts, feedback_result_id),
)
# See NOTE(piotrm): order_by.

@classmethod
def parse(
Expand Down Expand Up @@ -343,16 +375,17 @@ class GroundTruth(base):
_table_base_name = "ground_truth"

ground_truth_id = Column(TYPE_ID, nullable=False, primary_key=True)
dataset_id = Column(Text, nullable=False)
dataset_id = Column(
Text, ForeignKey(f"{prefix}dataset.dataset_id"), nullable=False
)
ground_truth_json = Column(TYPE_JSON, nullable=False)

dataset = relationship(
"Dataset",
backref=backref("ground_truths", cascade="all,delete"),
primaryjoin="Dataset.dataset_id == GroundTruth.dataset_id",
foreign_keys=dataset_id,
order_by="(GroundTruth.ground_truth_id)",
# order_by=ground_truth_id,
)
# See NOTE(piotrm): order_by

@classmethod
def parse(
Expand Down Expand Up @@ -397,7 +430,7 @@ def parse(
# Without the above, orm class attributes which are defined using backref
# will not be visible, i.e. orm.AppDefinition.records.

# base.registry.configure()
base.registry.configure()

return NewORM

Expand Down Expand Up @@ -451,7 +484,7 @@ def make_orm_for_prefix(

base: Type[T] = new_base(prefix=table_prefix)

return new_orm(base)
return new_orm(base, prefix=table_prefix)


@event.listens_for(Engine, "connect")
Expand Down
2 changes: 2 additions & 0 deletions src/core/trulens/core/database/sqlalchemy.py
Original file line number Diff line number Diff line change
Expand Up @@ -778,6 +778,8 @@ def get_records_and_feedback(

stmt = stmt.limit(limit).offset(offset)

print("stmt=", stmt)

ex = session.execute(stmt).unique()
# unique needed for joinedload above.

Expand Down
12 changes: 11 additions & 1 deletion src/core/trulens/core/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -729,20 +729,30 @@ def get_leaderboard(
self,
app_ids: Optional[List[mod_types_schema.AppID]] = None,
group_by_metadata_key: Optional[str] = None,
limit: Optional[int] = None,
offset: Optional[int] = None,
) -> pandas.DataFrame:
"""Get a leaderboard for the given apps.
Args:
app_ids: A list of app ids to filter records by. If empty or not given, all
apps will be included in leaderboard.
group_by_metadata_key: A key included in record metadata that you want to group results by.
limit: Limit on the number of records to return.
offset: Record row offset.
Returns:
Dataframe of apps with their feedback results aggregated.
If group_by_metadata_key is provided, the dataframe will be grouped by the specified key.
"""
return self.connector.get_leaderboard(
app_ids=app_ids, group_by_metadata_key=group_by_metadata_key
app_ids=app_ids,
group_by_metadata_key=group_by_metadata_key,
limit=limit,
offset=offset,
)

def add_ground_truth_to_dataset(
Expand Down

0 comments on commit 3027a36

Please sign in to comment.