Skip to content

Evals

Data structures for fed_rag.evals

BenchmarkExample

Bases: BaseModel

Benchmark example data class.

Source code in src/fed_rag/data_structures/evals.py
class BenchmarkExample(BaseModel):
    """Benchmark example data class."""

    query: str
    response: str
    context: str | None = None

BenchmarkResult

Bases: BaseModel

Benchmark result data class.

Source code in src/fed_rag/data_structures/evals.py
class BenchmarkResult(BaseModel):
    """Benchmark result data class."""

    score: float
    metric_name: str
    num_examples_used: int
    num_total_examples: int
    evaluations_file: str | None

BenchmarkEvaluatedExample

Bases: BaseModel

Evaluated benchmark example data class.

Source code in src/fed_rag/data_structures/evals.py
class BenchmarkEvaluatedExample(BaseModel):
    """Evaluated benchmark example data class."""

    score: float
    example: BenchmarkExample
    rag_response: RAGResponse

    def model_dump_json_without_embeddings(self) -> str:
        return self.model_dump_json(
            exclude={
                "rag_response": {
                    "source_nodes": {"__all__": {"node": {"embedding"}}}
                }
            }
        )

AggregationMode

Bases: str, Enum

Mode for aggregating evaluation scores.

Source code in src/fed_rag/data_structures/evals.py
class AggregationMode(str, Enum):
    """Mode for aggregating evaluation scores."""

    AVG = "avg"
    SUM = "sum"
    MAX = "max"
    MIN = "min"