Skip to content

Unsloth

Unsloth FastModel Generator

UnslothFastModelGenerator

Bases: UnslothGeneratorMixin, BaseGenerator

Unsloth FastModel Integration.

System Requirements (see https://docs.unsloth.ai/get-started/beginner-start-here/unsloth-requirements) - Operating System: Works on Linux and Windows. - Supports NVIDIA GPUs since 2018+. Minimum CUDA Capability 7.0 (V100, T4, Titan V, RTX 20, 30, 40x, A100, H100, L40 etc) Check your GPU! GTX 1070, 1080 works, but is slow. - Your device must have xformers, torch, BitsandBytes and triton support. - Unsloth only works if you have a NVIDIA GPU. Make sure you also have disk space to train & save your model

Source code in src/fed_rag/generators/unsloth/unsloth_fast_model.py
class UnslothFastModelGenerator(UnslothGeneratorMixin, BaseGenerator):
    """Unsloth FastModel Integration.

    System Requirements (see https://docs.unsloth.ai/get-started/beginner-start-here/unsloth-requirements)
        - Operating System: Works on Linux and Windows.
        - Supports NVIDIA GPUs since 2018+. Minimum CUDA Capability 7.0
        (V100, T4, Titan V, RTX 20, 30, 40x, A100, H100, L40 etc)
        Check your GPU! GTX 1070, 1080 works, but is slow.
        - Your device must have xformers, torch, BitsandBytes and triton support.
        - Unsloth only works if you have a NVIDIA GPU. Make sure you also have disk space to train & save your model
    """

    model_config = ConfigDict(protected_namespaces=("pydantic_model_",))
    model_name: str = Field(
        description="Name of Unsloth model. Used for loading the model from HF hub or local."
    )
    generation_config: "GenerationConfig" = Field(
        description="The generation config used for generating with the PreTrainedModel."
    )
    load_model_kwargs: dict = Field(
        description="Optional kwargs dict for loading ~unsloth.FastModel.from_pretrained(). Defaults to None.",
        default_factory=dict,
    )
    _prompt_template: str = PrivateAttr(default=DEFAULT_PROMPT_TEMPLATE)
    _model: Optional[Union["PreTrainedModel", "PeftModel"]] = PrivateAttr(
        default=None
    )
    _tokenizer: UnslothPretrainedTokenizer | None = PrivateAttr(default=None)

    def __init__(
        self,
        model_name: str,
        generation_config: Optional["GenerationConfig"] = None,
        prompt_template: str | None = None,
        load_model_kwargs: dict | None = None,
        load_model_at_init: bool = True,
    ):
        # if reaches here, then passed checks for extra
        from transformers.generation.utils import GenerationConfig

        generation_config = (
            generation_config if generation_config else GenerationConfig()
        )
        super().__init__(
            model_name=model_name,
            generation_config=generation_config,
            load_model_kwargs=load_model_kwargs if load_model_kwargs else {},
        )
        self._prompt_template = (
            prompt_template if prompt_template else DEFAULT_PROMPT_TEMPLATE
        )
        if load_model_at_init:
            self._model, tokenizer = self._load_model_and_tokenizer()
            self._tokenizer = UnslothPretrainedTokenizer(
                model_name=self.model_name, tokenizer=tokenizer
            )

    @model_validator(mode="before")
    @classmethod
    def check_dependencies(cls, data: Any) -> Any:
        """Validate that qdrant dependencies are installed."""
        check_unsloth_installed(cls.__name__)
        return data

    def _load_model_and_tokenizer(
        self, **kwargs: Any
    ) -> tuple[Union["PreTrainedModel", "PeftModel"], "PreTrainedTokenizer"]:
        from unsloth import FastLanguageModel

        load_kwargs = self.load_model_kwargs
        load_kwargs.update(kwargs)
        self.load_model_kwargs = load_kwargs
        model, tokenizer = FastLanguageModel.from_pretrained(
            self.model_name, **load_kwargs
        )
        return model, tokenizer

    @property
    def model(self) -> Union["PreTrainedModel", "PeftModel"]:
        if self._model is None:
            # load HF Pretrained Model
            model, tokenizer = self._load_model_and_tokenizer()
            self._model = model
            if self._tokenizer is None:
                self._tokenizer = UnslothPretrainedTokenizer(
                    model_name=self.model_name, tokenizer=tokenizer
                )
        return self._model

    @model.setter
    def model(self, value: Union["PreTrainedModel", "PeftModel"]) -> None:
        self._model = value

    @property
    def tokenizer(self) -> UnslothPretrainedTokenizer:
        return self._tokenizer

    @tokenizer.setter
    def tokenizer(self, value: UnslothPretrainedTokenizer) -> None:
        self._tokenizer = value

    @property
    def prompt_template(self) -> str:
        return self._prompt_template

    @prompt_template.setter
    def prompt_template(self, value: str) -> None:
        self._prompt_template = value

    def _get_peft_model(self, **kwargs: Any) -> "PeftModel":
        """A light wrapper over ~FastModel.get_peft_model()."""
        from unsloth import FastLanguageModel

        model = FastLanguageModel.get_peft_model(self.model, **kwargs)

        # Fix any potential dtype mismatch with any adapters and base model
        base_dtype = next(model.parameters()).dtype

        for _name, param in model.named_parameters():
            if param.requires_grad and param.dtype != base_dtype:
                param.data = param.data.to(base_dtype)

        return model

    def to_peft(self, **kwargs: Any) -> Self:
        """Sets the current model to PeftModel

        NOTE: Pass params to underlying get_peft_model using **kwargs.

        This returns Self to support fluent style:
            `generator = UnslothFastModelGenerator(...).to_peft(...)`
        """
        from peft import PeftModel

        if isinstance(self.model, PeftModel):
            raise GeneratorError(
                "Cannot use `to_peft` when underlying model is already a `~peft.PeftModel`."
            )

        # set model to new peft model
        self.model = self._get_peft_model(**kwargs)
        return self

check_dependencies classmethod

check_dependencies(data)

Validate that qdrant dependencies are installed.

Source code in src/fed_rag/generators/unsloth/unsloth_fast_model.py
@model_validator(mode="before")
@classmethod
def check_dependencies(cls, data: Any) -> Any:
    """Validate that qdrant dependencies are installed."""
    check_unsloth_installed(cls.__name__)
    return data

to_peft

to_peft(**kwargs)

Sets the current model to PeftModel

NOTE: Pass params to underlying get_peft_model using **kwargs.

This returns Self to support fluent style

generator = UnslothFastModelGenerator(...).to_peft(...)

Source code in src/fed_rag/generators/unsloth/unsloth_fast_model.py
def to_peft(self, **kwargs: Any) -> Self:
    """Sets the current model to PeftModel

    NOTE: Pass params to underlying get_peft_model using **kwargs.

    This returns Self to support fluent style:
        `generator = UnslothFastModelGenerator(...).to_peft(...)`
    """
    from peft import PeftModel

    if isinstance(self.model, PeftModel):
        raise GeneratorError(
            "Cannot use `to_peft` when underlying model is already a `~peft.PeftModel`."
        )

    # set model to new peft model
    self.model = self._get_peft_model(**kwargs)
    return self