Skip to content

Evals

Base Benchmark and Benchmarker

BaseBenchmark

Bases: BaseModel, ABC

Base Benchmark.

Source code in src/fed_rag/base/evals/benchmark.py
class BaseBenchmark(BaseModel, ABC):
    """Base Benchmark."""

    _examples: Sequence[BenchmarkExample] = PrivateAttr()

    model_config = ConfigDict(arbitrary_types_allowed=True)

    # give it a sequence interface for accessing examples more easily
    def __getitem__(self, index: int) -> BenchmarkExample:
        return self._examples.__getitem__(index)

    def __len__(self) -> int:
        return self._examples.__len__()

    # shouldn't override Pydantic BaseModels' __iter__
    def as_iterator(self) -> Iterator[BenchmarkExample]:
        return self._examples.__iter__()

    @model_validator(mode="after")
    def set_examples(self) -> "BaseBenchmark":
        self._examples = self._get_examples()
        return self

    # abstractmethods
    @abstractmethod
    def _get_examples(self, **kwargs: Any) -> Sequence[BenchmarkExample]:
        """Method to get examples."""

    @abstractmethod
    def as_stream(self) -> Generator[BenchmarkExample, None, None]:
        """Produce a stream of `BenchmarkExamples`."""

    @property
    @abstractmethod
    def num_examples(self) -> int:
        """Number of examples in the benchmark.

        NOTE: if streaming, `_examples` is likely set to an empty list. Thus,
        we leave this implementation for the subclasses.
        """

num_examples abstractmethod property

num_examples

Number of examples in the benchmark.

NOTE: if streaming, _examples is likely set to an empty list. Thus, we leave this implementation for the subclasses.

as_stream abstractmethod

as_stream()

Produce a stream of BenchmarkExamples.

Source code in src/fed_rag/base/evals/benchmark.py
@abstractmethod
def as_stream(self) -> Generator[BenchmarkExample, None, None]:
    """Produce a stream of `BenchmarkExamples`."""

Base EvaluationMetric

BaseEvaluationMetric

Bases: BaseModel, ABC

Base Data Collator.

Source code in src/fed_rag/base/evals/metric.py
class BaseEvaluationMetric(BaseModel, ABC):
    """Base Data Collator."""

    @abstractmethod
    def __call__(
        self, prediction: str, actual: str, *args: Any, **kwargs: Any
    ) -> float:
        """Evaluate an example prediction against the actual response."""

__call__ abstractmethod

__call__(prediction, actual, *args, **kwargs)

Evaluate an example prediction against the actual response.

Source code in src/fed_rag/base/evals/metric.py
@abstractmethod
def __call__(
    self, prediction: str, actual: str, *args: Any, **kwargs: Any
) -> float:
    """Evaluate an example prediction against the actual response."""