Evals

Data structures for fed_rag.evals

BenchmarkExample ¶

Bases: BaseModel

Benchmark example data class.

This class represents a single benchmark example.

Attributes:

Name	Type	Description
`query(str)`		Query string.
`response(str)`		Response string.
`context(str\|None)`		Context string.

Source code in src/fed_rag/data_structures/evals.py

class BenchmarkExample(BaseModel):
    """Benchmark example data class.

    This class represents a single benchmark example.

    Attributes:
        query(str): Query string.
        response(str): Response string.
        context(str|None): Context string.
    """

    query: str
    response: str
    context: str | None = None

BenchmarkResult ¶

Bases: BaseModel

Benchmark result data class.

This class represents the result of a benchmark.

Attributes:

Name	Type	Description
`score(float)`		Score of the benchmark example.
`metric_name(str)`		Name of the metric used for scoring.
`num_examples_used(int)`		Number of examples used for scoring.
`num_total_examples(int)`		Number of total examples in the benchmark.
`evaluations_file(str\|None)`		Path to the evaluations file.

Source code in src/fed_rag/data_structures/evals.py

class BenchmarkResult(BaseModel):
    """Benchmark result data class.

    This class represents the result of a benchmark.

    Attributes:
        score(float): Score of the benchmark example.
        metric_name(str): Name of the metric used for scoring.
        num_examples_used(int): Number of examples used for scoring.
        num_total_examples(int): Number of total examples in the benchmark.
        evaluations_file(str|None): Path to the evaluations file.

    """

    score: float
    metric_name: str
    num_examples_used: int
    num_total_examples: int
    evaluations_file: str | None

BenchmarkEvaluatedExample ¶

Bases: BaseModel

Evaluated benchmark example data class.

This class represents an evaluated benchmark example.

Attributes:

Name	Type	Description
`score(float)`		Score of the benchmark example.
`example(BenchmarkExample)`		Benchmark example.
`rag_response(RAGResponse)`		RAG response.

Source code in src/fed_rag/data_structures/evals.py

class BenchmarkEvaluatedExample(BaseModel):
    """Evaluated benchmark example data class.

    This class represents an evaluated benchmark example.

    Attributes:
        score(float): Score of the benchmark example.
        example(BenchmarkExample): Benchmark example.
        rag_response(RAGResponse): RAG response.

    """

    score: float
    example: BenchmarkExample
    rag_response: RAGResponse

    def model_dump_json_without_embeddings(self) -> str:
        """
        Generates and returns a JSON representation of the model excluding specific
        embedding-related data.
        """

        return self.model_dump_json(
            exclude={
                "rag_response": {
                    "source_nodes": {"__all__": {"node": {"embedding"}}}
                }
            }
        )

model_dump_json_without_embeddings ¶

model_dump_json_without_embeddings()

Generates and returns a JSON representation of the model excluding specific embedding-related data.

Source code in src/fed_rag/data_structures/evals.py

def model_dump_json_without_embeddings(self) -> str:
    """
    Generates and returns a JSON representation of the model excluding specific
    embedding-related data.
    """

    return self.model_dump_json(
        exclude={
            "rag_response": {
                "source_nodes": {"__all__": {"node": {"embedding"}}}
            }
        }
    )

AggregationMode ¶

Bases: str, Enum

Mode for aggregating evaluation scores.

This enum defines the available modes for aggregating multiple evaluation scores into a single value.

Attributes:

Name	Type	Description
`AVG`		Calculates the arithmetic mean of the scores.
`SUM`		Calculates the sum of all scores.
`MAX`		Takes the maximum score value.
`MIN`		Takes the minimum score value.

Source code in src/fed_rag/data_structures/evals.py

class AggregationMode(str, Enum):
    """Mode for aggregating evaluation scores.

    This enum defines the available modes for aggregating multiple evaluation scores
    into a single value.

    Attributes:
        AVG: Calculates the arithmetic mean of the scores.
        SUM: Calculates the sum of all scores.
        MAX: Takes the maximum score value.
        MIN: Takes the minimum score value.
    """

    AVG = "avg"
    SUM = "sum"
    MAX = "max"
    MIN = "min"