Skip to content

Commit

Permalink
feat (experimental) added new prompt and metric into `ragas.experimen…
Browse files Browse the repository at this point in the history
…tal` (#1240)

you can use it like
```py
from ragas.experimental.metrics import FaithfulnessExperimental
from ragas.metrics import faithfulness
from ragas import evaluate


f = FaithfulnessExperimental(llm=LangchainLLMWrapper(gpt4o))
faithfulness.llm = LangchainLLMWrapper(gpt4o)

# row = amnesty_qa["eval"][0]
# await f.ascore(row)
# await faithfulness.ascore(row)

r = evaluate(
    amnesty_qa["eval"].select(range(10)),
    metrics=[f, faithfulness],
    raise_exceptions=True,
    callbacks=[]
)
```
  • Loading branch information
jjmachan committed Sep 3, 2024
1 parent 68d52b9 commit fe379a1
Show file tree
Hide file tree
Showing 6 changed files with 48 additions and 41 deletions.
22 changes: 9 additions & 13 deletions src/experimental/tests/test_prompt.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from ragas_experimental.llms.prompt import StringPrompt, StringIO
import pytest
from langchain_core.outputs import Generation, LLMResult
from ragas_experimental.llms.prompt import StringIO, StringPrompt

from ragas.llms.base import BaseRagasLLM
from langchain_core.outputs import LLMResult, Generation
from ragas.llms.prompt import PromptValue
from ragas.run_config import RunConfig

import pytest


class EchoLLM(BaseRagasLLM):
def generate_text( # type: ignore
Expand Down Expand Up @@ -37,10 +37,11 @@ async def test_string_prompt():


def test_process_fields():
from ragas_experimental.llms.prompt import PydanticPrompt, StringIO
from pydantic import BaseModel
from enum import Enum

from pydantic import BaseModel
from ragas_experimental.llms.prompt import PydanticPrompt, StringIO

class Categories(str, Enum):
science = "science"
commerce = "commerce"
Expand All @@ -63,10 +64,7 @@ class JokeGenerator(PydanticPrompt[InputModel, StringIO]):

@pytest.mark.asyncio
async def test_pydantic_prompt_io():
from ragas_experimental.llms.prompt import (
PydanticPrompt,
StringIO,
)
from ragas_experimental.llms.prompt import PydanticPrompt, StringIO

class Prompt(PydanticPrompt[StringIO, StringIO]):
instruction = ""
Expand All @@ -82,9 +80,7 @@ class Prompt(PydanticPrompt[StringIO, StringIO]):


def test_pydantic_prompt_examples():
from ragas_experimental.llms.prompt import (
PydanticPrompt,
)
from ragas_experimental.llms.prompt import PydanticPrompt

class Prompt(PydanticPrompt[StringIO, StringIO]):
instruction = ""
Expand Down
Empty file.
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
from __future__ import annotations

from abc import ABC, abstractmethod
from dataclasses import dataclass
import json
import typing as t
from abc import ABC, abstractmethod

from ragas.llms.output_parser import RagasoutputParser
from ragas.llms.prompt import PromptValue
import pydantic

# Check Pydantic version
from pydantic import BaseModel
import pydantic

from ragas.llms.output_parser import RagasoutputParser
from ragas.llms.prompt import PromptValue

if t.TYPE_CHECKING:
from ragas.llms.base import BaseRagasLLM
from langchain_core.callbacks import Callbacks

from ragas.llms.base import BaseRagasLLM

PYDANTIC_V2 = pydantic.VERSION.startswith("2.")


Expand All @@ -24,7 +24,7 @@ def __init__(self, llm):
self.llm: BaseRagasLLM = llm

@abstractmethod
async def generate(self, data: t.Any) -> t.Any:
async def generate(self, data: t.Any, callbacks: Callbacks = None) -> t.Any:
pass


Expand Down Expand Up @@ -57,12 +57,14 @@ def to_json(model: t.Any, indent: int = 4) -> str:
return model.json(indent=indent)


def model_to_json_schema(model: t.Type[BaseModel]) -> dict:
def model_to_json_schema(model: t.Type[BaseModel]) -> str:
if PYDANTIC_V2:
return model.model_json_schema()
# NOTE: this is not the same as model.schema_json()
return model.model_json_schema() # type: ignore
else:
return model.schema_json()


InputModel = t.TypeVar("InputModel", bound=BaseModel)
OutputModel = t.TypeVar("OutputModel", bound=BaseModel)

Expand Down Expand Up @@ -96,9 +98,11 @@ def generate_examples(self):
example_strings.append(
self.instruction
+ "\n"
+ "input: " + to_json(input_data, indent=4)
+ "input: "
+ to_json(input_data, indent=4)
+ "\n"
+ "output: " + to_json(output_data, indent=4)
+ "output: "
+ to_json(output_data, indent=4)
)

return (
Expand All @@ -118,12 +122,15 @@ def to_string(self, data: InputModel) -> str:
+ "\n"
+ self.generate_examples()
+ "\nNow perform the above instruction with the following input\n"
+ "input: " + to_json(data, indent=4)
+ "input: "
+ to_json(data, indent=4)
+ "\n"
+ "output: "
)

async def generate(self, data: InputModel, callbacks: Callbacks) -> OutputModel:
async def generate(
self, data: InputModel, callbacks: Callbacks = None
) -> OutputModel:
prompt_value = PromptValue(prompt_str=self.to_string(data))
resp = await self.llm.generate(prompt_value, callbacks=callbacks)
resp_text = resp.generations[0][0].text
Expand All @@ -135,7 +142,7 @@ async def generate(self, data: InputModel, callbacks: Callbacks) -> OutputModel:


class StringPrompt(BasePrompt):
async def generate(self, data: str) -> str:
async def generate(self, data: str, callbacks: Callbacks = None) -> str:
prompt_value = PromptValue(prompt_str=data)
llm_result = await self.llm.agenerate_text(prompt_value)
return llm_result.generations[0][0].text
llm_result = await self.llm.agenerate_text(prompt_value, callbacks=callbacks)
return llm_result.generations[0][0].text
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from ._faithfulness import FaithfulnessExperimental

__all__ = ["FaithfulnessExperimental"]
__all__ = ["FaithfulnessExperimental"]
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
from __future__ import annotations

import typing as t
import logging
import typing as t
from dataclasses import dataclass

from pydantic import BaseModel, Field
import numpy as np
from pydantic import BaseModel, Field

from ragas.experimental.llms.prompt import PydanticPrompt
from ragas.metrics.base import EvaluationMode, MetricWithLLM, get_segmenter
from ragas_experimental.llms.prompt import PydanticPrompt

if t.TYPE_CHECKING:
from langchain_core.callbacks import Callbacks

from ragas.metrics._faithfulness import HasSegmentMethod


Expand Down Expand Up @@ -187,6 +188,8 @@ async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float:
answer, question, contexts = row["answer"], row["question"], row["contexts"]

# get the sentences from the answer
if self.sentence_segmenter is None:
raise ValueError("Sentence segmenter is not set")
sentences = self.sentence_segmenter.segment(answer)
# TODO: why do we do this?
sentences = [
Expand All @@ -198,9 +201,9 @@ async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float:
answer=answer,
sentences={i: sentence for i, sentence in enumerate(sentences)},
),
callbacks=callbacks
callbacks=callbacks,
)

statements = [
statement
for component in sentence_components.sentences
Expand All @@ -211,9 +214,9 @@ async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float:
context="\n".join(contexts),
statements=statements,
),
callbacks=callbacks
callbacks=callbacks,
)

# compute the score
num_faithful_statements = sum(
verdict.verdict for verdict in verdicts.statements
Expand All @@ -223,4 +226,3 @@ async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float:
else:
score = np.nan
return score

6 changes: 4 additions & 2 deletions src/ragas/llms/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ def generate_text(
temperature: float = 1e-8,
stop: t.Optional[t.List[str]] = None,
callbacks: Callbacks = None,
) -> LLMResult: ...
) -> LLMResult:
...

@abstractmethod
async def agenerate_text(
Expand All @@ -73,7 +74,8 @@ async def agenerate_text(
temperature: t.Optional[float] = None,
stop: t.Optional[t.List[str]] = None,
callbacks: Callbacks = None,
) -> LLMResult: ...
) -> LLMResult:
...

async def generate(
self,
Expand Down

0 comments on commit fe379a1

Please sign in to comment.