Skip to content

PubMedQA

PubMedQA benchmark

HuggingFacePubMedQA

Bases: HuggingFaceBenchmarkMixin, BaseBenchmark

HuggingFace PubMedQA Benchmark.

PubMedQA is a biomedical question answering dataset where each question can be answered with "yes", "no", or "maybe" based on the given context.

Example schema

{ "pubid": "25429730", "question": "Are group 2 innate lymphoid cells (ILC2s) increased in chronic rhinosinusitis with nasal polyps or eosinophilia?", "context": { "contexts": [ "Chronic rhinosinusitis is a heterogeneous disease with uncertain pathogenesis.", "The study aimed to identify ILC2s in sinus mucosa in patients with CRS.", "35 patients including 13 with eosinophilic CRS were recruited.", "ILC2 frequencies were associated with the presence of nasal polyps and increased blood eosinophilia." ], "labels": ["label1", "label2", "label3", "label4"], "meshes": ["Chronic Disease", "Nasal Polyps", "Immunity, Innate"] }, "long_answer": "Based on our analysis, increased ILC2s are associated with CRS with nasal polyps.", "final_decision": "yes" # or "no" or "maybe" }

Source code in src/fed_rag/evals/benchmarks/huggingface/pubmedqa.py
class HuggingFacePubMedQA(HuggingFaceBenchmarkMixin, BaseBenchmark):
    """HuggingFace PubMedQA Benchmark.

    PubMedQA is a biomedical question answering dataset where each question
    can be answered with "yes", "no", or "maybe" based on the given context.

    Example schema:
        {
            "pubid": "25429730",
            "question": "Are group 2 innate lymphoid cells (ILC2s) increased in chronic rhinosinusitis with nasal polyps or eosinophilia?",
            "context": {
                "contexts": [
                    "Chronic rhinosinusitis is a heterogeneous disease with uncertain pathogenesis.",
                    "The study aimed to identify ILC2s in sinus mucosa in patients with CRS.",
                    "35 patients including 13 with eosinophilic CRS were recruited.",
                    "ILC2 frequencies were associated with the presence of nasal polyps and increased blood eosinophilia."
                ],
                "labels": ["label1", "label2", "label3", "label4"],
                "meshes": ["Chronic Disease", "Nasal Polyps", "Immunity, Innate"]
            },
            "long_answer": "Based on our analysis, increased ILC2s are associated with CRS with nasal polyps.",
            "final_decision": "yes"  # or "no" or "maybe"
        }"""

    dataset_name = "qiaojin/PubMedQA"
    configuration_name: str = "pqa_labeled"

    def _get_query_from_example(self, example: dict[str, Any]) -> str:
        return str(example["question"])

    def _get_response_from_example(self, example: dict[str, Any]) -> str:
        return str(example["final_decision"])

    def _get_context_from_example(self, example: dict[str, Any]) -> str:
        context = example.get("context", {})
        if isinstance(context, dict):
            contexts_list = context.get("contexts")
            if isinstance(contexts_list, list):
                return " ".join(contexts_list)
            # Fallback: join all values if "contexts" is missing
            return " ".join(
                " ".join(v) if isinstance(v, list) else str(v)
                for v in context.values()
            )
        elif isinstance(context, str):
            return context
        else:
            raise BenchmarkParseError(
                f"Unexpected context type: {type(context)} in example: {example}"
            )

    @model_validator(mode="before")
    @classmethod
    def _validate_extra_installed(cls, data: Any) -> Any:
        """Validate that huggingface-evals dependencies are installed."""
        check_huggingface_evals_installed(cls.__name__)
        return data