Python API Reference¶

This section documents the Python API for vector-inference.

Client Interface¶

vec_inf.client.api.VecInfClient ¶

Client for interacting with Vector Inference programmatically.

This class provides methods for launching models, checking their status, retrieving metrics, and shutting down models using the Vector Inference infrastructure.

Methods:

Name	Description
`list_models`	List all available models
`get_model_config`	Get configuration for a specific model
`launch_model`	Launch a model on the cluster
`get_status`	Get status of a running model
`get_metrics`	Get performance metrics of a running model
`shutdown_model`	Shutdown a running model
`wait_until_ready`	Wait for a model to become ready

Examples:

>>> from vec_inf.api import VecInfClient
>>> client = VecInfClient()
>>> response = client.launch_model("Meta-Llama-3.1-8B-Instruct")
>>> job_id = response.slurm_job_id
>>> status = client.get_status(job_id)
>>> if status.status == ModelStatus.READY:
...     print(f"Model is ready at {status.base_url}")
>>> client.shutdown_model(job_id)

Source code in vec_inf/client/api.py

class VecInfClient:
    """Client for interacting with Vector Inference programmatically.

    This class provides methods for launching models, checking their status,
    retrieving metrics, and shutting down models using the Vector Inference
    infrastructure.

    Methods
    -------
    list_models()
        List all available models
    get_model_config(model_name)
        Get configuration for a specific model
    launch_model(model_name, options)
        Launch a model on the cluster
    get_status(slurm_job_id, log_dir)
        Get status of a running model
    get_metrics(slurm_job_id, log_dir)
        Get performance metrics of a running model
    shutdown_model(slurm_job_id)
        Shutdown a running model
    wait_until_ready(slurm_job_id, timeout_seconds, poll_interval_seconds, log_dir)
        Wait for a model to become ready

    Examples
    --------
    >>> from vec_inf.api import VecInfClient
    >>> client = VecInfClient()
    >>> response = client.launch_model("Meta-Llama-3.1-8B-Instruct")
    >>> job_id = response.slurm_job_id
    >>> status = client.get_status(job_id)
    >>> if status.status == ModelStatus.READY:
    ...     print(f"Model is ready at {status.base_url}")
    >>> client.shutdown_model(job_id)
    """

    def __init__(self) -> None:
        """Initialize the Vector Inference client."""
        pass

    def list_models(self) -> list[ModelInfo]:
        """List all available models.

        Returns
        -------
        list[ModelInfo]
            List of ModelInfo objects containing information about available models,
            including their configurations and specifications.
        """
        model_registry = ModelRegistry()
        return model_registry.get_all_models()

    def get_model_config(self, model_name: str) -> ModelConfig:
        """Get the configuration for a specific model.

        Parameters
        ----------
        model_name : str
            Name of the model to get configuration for

        Returns
        -------
        ModelConfig
            Complete configuration for the specified model

        Raises
        ------
        ModelNotFoundError
            If the specified model is not found in the configuration
        """
        model_registry = ModelRegistry()
        return model_registry.get_single_model_config(model_name)

    def launch_model(
        self, model_name: str, options: Optional[LaunchOptions] = None
    ) -> LaunchResponse:
        """Launch a model on the cluster.

        Parameters
        ----------
        model_name : str
            Name of the model to launch
        options : LaunchOptions, optional
            Launch options to override default configuration

        Returns
        -------
        LaunchResponse
            Response containing launch details including:
            - SLURM job ID
            - Model configuration
            - Launch status

        Raises
        ------
        ModelConfigurationError
            If the model configuration is invalid
        SlurmJobError
            If there's an error launching the SLURM job
        """
        # Convert LaunchOptions to dictionary if provided
        options_dict: dict[str, Any] = {}
        if options:
            options_dict = {k: v for k, v in vars(options).items() if v is not None}

        # Create and use the API Launch Helper
        model_launcher = ModelLauncher(model_name, options_dict)
        return model_launcher.launch()

    def get_status(
        self, slurm_job_id: int, log_dir: Optional[str] = None
    ) -> StatusResponse:
        """Get the status of a running model.

        Parameters
        ----------
        slurm_job_id : int
            The SLURM job ID to check
        log_dir : str, optional
            Path to the SLURM log directory. If None, uses default location

        Returns
        -------
        StatusResponse
            Status information including:
            - Model name
            - Server status
            - Job state
            - Base URL (if ready)
            - Error information (if failed)
        """
        model_status_monitor = ModelStatusMonitor(slurm_job_id, log_dir)
        return model_status_monitor.process_model_status()

    def get_metrics(
        self, slurm_job_id: int, log_dir: Optional[str] = None
    ) -> MetricsResponse:
        """Get the performance metrics of a running model.

        Parameters
        ----------
        slurm_job_id : int
            The SLURM job ID to get metrics for
        log_dir : str, optional
            Path to the SLURM log directory. If None, uses default location

        Returns
        -------
        MetricsResponse
            Response containing:
            - Model name
            - Performance metrics or error message
            - Timestamp of collection
        """
        performance_metrics_collector = PerformanceMetricsCollector(
            slurm_job_id, log_dir
        )

        metrics: Union[dict[str, float], str]
        if not performance_metrics_collector.metrics_url.startswith("http"):
            metrics = performance_metrics_collector.metrics_url
        else:
            metrics = performance_metrics_collector.fetch_metrics()

        return MetricsResponse(
            model_name=performance_metrics_collector.status_info.model_name,
            metrics=metrics,
            timestamp=time.time(),
        )

    def shutdown_model(self, slurm_job_id: int) -> bool:
        """Shutdown a running model.

        Parameters
        ----------
        slurm_job_id : int
            The SLURM job ID to shut down

        Returns
        -------
        bool
            True if the model was successfully shutdown

        Raises
        ------
        SlurmJobError
            If there was an error shutting down the model
        """
        shutdown_cmd = f"scancel {slurm_job_id}"
        _, stderr = run_bash_command(shutdown_cmd)
        if stderr:
            raise SlurmJobError(f"Failed to shutdown model: {stderr}")
        return True

    def wait_until_ready(
        self,
        slurm_job_id: int,
        timeout_seconds: int = 1800,
        poll_interval_seconds: int = 10,
        log_dir: Optional[str] = None,
    ) -> StatusResponse:
        """Wait until a model is ready or fails.

        Parameters
        ----------
        slurm_job_id : int
            The SLURM job ID to wait for
        timeout_seconds : int, optional
            Maximum time to wait in seconds, by default 1800 (30 mins)
        poll_interval_seconds : int, optional
            How often to check status in seconds, by default 10
        log_dir : str, optional
            Path to the SLURM log directory. If None, uses default location

        Returns
        -------
        StatusResponse
            Status information when the model becomes ready

        Raises
        ------
        SlurmJobError
            If the specified job is not found or there's an error with the job
        ServerError
            If the server fails to start within the timeout period
        APIError
            If there was an error checking the status

        Notes
        -----
        The timeout is reset if the model is still in PENDING state after the
        initial timeout period. This allows for longer queue times in the SLURM
        scheduler.
        """
        start_time = time.time()

        while True:
            status_info = self.get_status(slurm_job_id, log_dir)

            if status_info.server_status == ModelStatus.READY:
                return status_info

            if status_info.server_status == ModelStatus.FAILED:
                error_message = status_info.failed_reason or "Unknown error"
                raise ServerError(f"Model failed to start: {error_message}")

            if status_info.server_status == ModelStatus.SHUTDOWN:
                raise ServerError("Model was shutdown before it became ready")

            # Check timeout
            if time.time() - start_time > timeout_seconds:
                if status_info.server_status == ModelStatus.PENDING:
                    warnings.warn(
                        f"Model is still pending after {timeout_seconds} seconds, resetting timer...",
                        UserWarning,
                        stacklevel=2,
                    )
                    start_time = time.time()
                raise ServerError(
                    f"Timed out waiting for model to become ready after {timeout_seconds} seconds"
                )

            # Wait before checking again
            time.sleep(poll_interval_seconds)

init ¶

__init__()

Initialize the Vector Inference client.

Source code in vec_inf/client/api.py

def __init__(self) -> None:
    """Initialize the Vector Inference client."""
    pass

list_models ¶

list_models()

List all available models.

Returns:

Type	Description
`list[ModelInfo]`	List of ModelInfo objects containing information about available models, including their configurations and specifications.

Source code in vec_inf/client/api.py

def list_models(self) -> list[ModelInfo]:
    """List all available models.

    Returns
    -------
    list[ModelInfo]
        List of ModelInfo objects containing information about available models,
        including their configurations and specifications.
    """
    model_registry = ModelRegistry()
    return model_registry.get_all_models()

get_model_config ¶

get_model_config(model_name)

Get the configuration for a specific model.

Parameters:

Name	Type	Description	Default
`model_name`	`str`	Name of the model to get configuration for	required

Returns:

Type	Description
`ModelConfig`	Complete configuration for the specified model

Raises:

Type	Description
`ModelNotFoundError`	If the specified model is not found in the configuration

Source code in vec_inf/client/api.py

def get_model_config(self, model_name: str) -> ModelConfig:
    """Get the configuration for a specific model.

    Parameters
    ----------
    model_name : str
        Name of the model to get configuration for

    Returns
    -------
    ModelConfig
        Complete configuration for the specified model

    Raises
    ------
    ModelNotFoundError
        If the specified model is not found in the configuration
    """
    model_registry = ModelRegistry()
    return model_registry.get_single_model_config(model_name)

launch_model ¶

launch_model(model_name, options=None)

Launch a model on the cluster.

Parameters:

Name	Type	Description	Default
`model_name`	`str`	Name of the model to launch	required
`options`	`LaunchOptions`	Launch options to override default configuration	`None`

Returns:

Type	Description
`LaunchResponse`	Response containing launch details including: - SLURM job ID - Model configuration - Launch status

Raises:

Type	Description
`ModelConfigurationError`	If the model configuration is invalid
`SlurmJobError`	If there's an error launching the SLURM job

Source code in vec_inf/client/api.py

def launch_model(
    self, model_name: str, options: Optional[LaunchOptions] = None
) -> LaunchResponse:
    """Launch a model on the cluster.

    Parameters
    ----------
    model_name : str
        Name of the model to launch
    options : LaunchOptions, optional
        Launch options to override default configuration

    Returns
    -------
    LaunchResponse
        Response containing launch details including:
        - SLURM job ID
        - Model configuration
        - Launch status

    Raises
    ------
    ModelConfigurationError
        If the model configuration is invalid
    SlurmJobError
        If there's an error launching the SLURM job
    """
    # Convert LaunchOptions to dictionary if provided
    options_dict: dict[str, Any] = {}
    if options:
        options_dict = {k: v for k, v in vars(options).items() if v is not None}

    # Create and use the API Launch Helper
    model_launcher = ModelLauncher(model_name, options_dict)
    return model_launcher.launch()

get_status ¶

get_status(slurm_job_id, log_dir=None)

Get the status of a running model.

Parameters:

Name	Type	Description	Default
`slurm_job_id`	`int`	The SLURM job ID to check	required
`log_dir`	`str`	Path to the SLURM log directory. If None, uses default location	`None`

Returns:

Type	Description
`StatusResponse`	Status information including: - Model name - Server status - Job state - Base URL (if ready) - Error information (if failed)

Source code in vec_inf/client/api.py

def get_status(
    self, slurm_job_id: int, log_dir: Optional[str] = None
) -> StatusResponse:
    """Get the status of a running model.

    Parameters
    ----------
    slurm_job_id : int
        The SLURM job ID to check
    log_dir : str, optional
        Path to the SLURM log directory. If None, uses default location

    Returns
    -------
    StatusResponse
        Status information including:
        - Model name
        - Server status
        - Job state
        - Base URL (if ready)
        - Error information (if failed)
    """
    model_status_monitor = ModelStatusMonitor(slurm_job_id, log_dir)
    return model_status_monitor.process_model_status()

get_metrics ¶

get_metrics(slurm_job_id, log_dir=None)

Get the performance metrics of a running model.

Parameters:

Name	Type	Description	Default
`slurm_job_id`	`int`	The SLURM job ID to get metrics for	required
`log_dir`	`str`	Path to the SLURM log directory. If None, uses default location	`None`

Returns:

Type	Description
`MetricsResponse`	Response containing: - Model name - Performance metrics or error message - Timestamp of collection

Source code in vec_inf/client/api.py

def get_metrics(
    self, slurm_job_id: int, log_dir: Optional[str] = None
) -> MetricsResponse:
    """Get the performance metrics of a running model.

    Parameters
    ----------
    slurm_job_id : int
        The SLURM job ID to get metrics for
    log_dir : str, optional
        Path to the SLURM log directory. If None, uses default location

    Returns
    -------
    MetricsResponse
        Response containing:
        - Model name
        - Performance metrics or error message
        - Timestamp of collection
    """
    performance_metrics_collector = PerformanceMetricsCollector(
        slurm_job_id, log_dir
    )

    metrics: Union[dict[str, float], str]
    if not performance_metrics_collector.metrics_url.startswith("http"):
        metrics = performance_metrics_collector.metrics_url
    else:
        metrics = performance_metrics_collector.fetch_metrics()

    return MetricsResponse(
        model_name=performance_metrics_collector.status_info.model_name,
        metrics=metrics,
        timestamp=time.time(),
    )

shutdown_model ¶

shutdown_model(slurm_job_id)

Shutdown a running model.

Parameters:

Name	Type	Description	Default
`slurm_job_id`	`int`	The SLURM job ID to shut down	required

Returns:

Type	Description
`bool`	True if the model was successfully shutdown

Raises:

Type	Description
`SlurmJobError`	If there was an error shutting down the model

Source code in vec_inf/client/api.py

def shutdown_model(self, slurm_job_id: int) -> bool:
    """Shutdown a running model.

    Parameters
    ----------
    slurm_job_id : int
        The SLURM job ID to shut down

    Returns
    -------
    bool
        True if the model was successfully shutdown

    Raises
    ------
    SlurmJobError
        If there was an error shutting down the model
    """
    shutdown_cmd = f"scancel {slurm_job_id}"
    _, stderr = run_bash_command(shutdown_cmd)
    if stderr:
        raise SlurmJobError(f"Failed to shutdown model: {stderr}")
    return True

wait_until_ready ¶

wait_until_ready(
    slurm_job_id,
    timeout_seconds=1800,
    poll_interval_seconds=10,
    log_dir=None,
)

Wait until a model is ready or fails.

Parameters:

Name	Type	Description	Default
`slurm_job_id`	`int`	The SLURM job ID to wait for	required
`timeout_seconds`	`int`	Maximum time to wait in seconds, by default 1800 (30 mins)	`1800`
`poll_interval_seconds`	`int`	How often to check status in seconds, by default 10	`10`
`log_dir`	`str`	Path to the SLURM log directory. If None, uses default location	`None`

Returns:

Type	Description
`StatusResponse`	Status information when the model becomes ready

Raises:

Type	Description
`SlurmJobError`	If the specified job is not found or there's an error with the job
`ServerError`	If the server fails to start within the timeout period
`APIError`	If there was an error checking the status

Notes

The timeout is reset if the model is still in PENDING state after the initial timeout period. This allows for longer queue times in the SLURM scheduler.

Source code in vec_inf/client/api.py

def wait_until_ready(
    self,
    slurm_job_id: int,
    timeout_seconds: int = 1800,
    poll_interval_seconds: int = 10,
    log_dir: Optional[str] = None,
) -> StatusResponse:
    """Wait until a model is ready or fails.

    Parameters
    ----------
    slurm_job_id : int
        The SLURM job ID to wait for
    timeout_seconds : int, optional
        Maximum time to wait in seconds, by default 1800 (30 mins)
    poll_interval_seconds : int, optional
        How often to check status in seconds, by default 10
    log_dir : str, optional
        Path to the SLURM log directory. If None, uses default location

    Returns
    -------
    StatusResponse
        Status information when the model becomes ready

    Raises
    ------
    SlurmJobError
        If the specified job is not found or there's an error with the job
    ServerError
        If the server fails to start within the timeout period
    APIError
        If there was an error checking the status

    Notes
    -----
    The timeout is reset if the model is still in PENDING state after the
    initial timeout period. This allows for longer queue times in the SLURM
    scheduler.
    """
    start_time = time.time()

    while True:
        status_info = self.get_status(slurm_job_id, log_dir)

        if status_info.server_status == ModelStatus.READY:
            return status_info

        if status_info.server_status == ModelStatus.FAILED:
            error_message = status_info.failed_reason or "Unknown error"
            raise ServerError(f"Model failed to start: {error_message}")

        if status_info.server_status == ModelStatus.SHUTDOWN:
            raise ServerError("Model was shutdown before it became ready")

        # Check timeout
        if time.time() - start_time > timeout_seconds:
            if status_info.server_status == ModelStatus.PENDING:
                warnings.warn(
                    f"Model is still pending after {timeout_seconds} seconds, resetting timer...",
                    UserWarning,
                    stacklevel=2,
                )
                start_time = time.time()
            raise ServerError(
                f"Timed out waiting for model to become ready after {timeout_seconds} seconds"
            )

        # Wait before checking again
        time.sleep(poll_interval_seconds)

Data Models¶

vec_inf.client.models ¶

Data models for Vector Inference API.

This module contains the data model classes used by the Vector Inference API for both request parameters and response objects.

Classes:

Name	Description
`ModelStatus : Enum`	Status states of a model
`ModelType : Enum`	Types of supported models
`LaunchResponse : dataclass`	Response from model launch operation
`StatusResponse : dataclass`	Response from model status check
`MetricsResponse : dataclass`	Response from metrics collection
`LaunchOptions : dataclass`	Options for model launch
`LaunchOptionsDict : TypedDict`	Dictionary representation of launch options
`ModelInfo : datacitten`	Information about available models

ModelStatus ¶

Bases: str, Enum

Enum representing the possible status states of a model.

Attributes:

Name	Type	Description
`PENDING`	`str`	Model is waiting for Slurm to allocate resources
`LAUNCHING`	`str`	Model is in the process of starting
`READY`	`str`	Model is running and ready to serve requests
`FAILED`	`str`	Model failed to start or encountered an error
`SHUTDOWN`	`str`	Model was intentionally stopped
`UNAVAILABLE`	`str`	Model status cannot be determined

Source code in vec_inf/client/models.py

class ModelStatus(str, Enum):
    """Enum representing the possible status states of a model.

    Attributes
    ----------
    PENDING : str
        Model is waiting for Slurm to allocate resources
    LAUNCHING : str
        Model is in the process of starting
    READY : str
        Model is running and ready to serve requests
    FAILED : str
        Model failed to start or encountered an error
    SHUTDOWN : str
        Model was intentionally stopped
    UNAVAILABLE : str
        Model status cannot be determined
    """

    PENDING = "PENDING"
    LAUNCHING = "LAUNCHING"
    READY = "READY"
    FAILED = "FAILED"
    SHUTDOWN = "SHUTDOWN"
    UNAVAILABLE = "UNAVAILABLE"

ModelType ¶

Bases: str, Enum

Enum representing the possible model types.

Attributes:

Name	Type	Description
`LLM`	`str`	Large Language Model
`VLM`	`str`	Vision Language Model
`TEXT_EMBEDDING`	`str`	Text Embedding Model
`REWARD_MODELING`	`str`	Reward Modeling Model

Source code in vec_inf/client/models.py

class ModelType(str, Enum):
    """Enum representing the possible model types.

    Attributes
    ----------
    LLM : str
        Large Language Model
    VLM : str
        Vision Language Model
    TEXT_EMBEDDING : str
        Text Embedding Model
    REWARD_MODELING : str
        Reward Modeling Model
    """

    LLM = "LLM"
    VLM = "VLM"
    TEXT_EMBEDDING = "Text_Embedding"
    REWARD_MODELING = "Reward_Modeling"

LaunchResponse `dataclass` ¶

Response from launching a model.

Parameters:

Name	Type	Description	Default
`slurm_job_id`	`int`	ID of the launched SLURM job	required
`model_name`	`str`	Name of the launched model	required
`config`	`dict[str, Any]`	Configuration used for the launch	required
`raw_output`	`str`	Raw output from the launch command (hidden from repr)	required

Source code in vec_inf/client/models.py

@dataclass
class LaunchResponse:
    """Response from launching a model.

    Parameters
    ----------
    slurm_job_id : int
        ID of the launched SLURM job
    model_name : str
        Name of the launched model
    config : dict[str, Any]
        Configuration used for the launch
    raw_output : str
        Raw output from the launch command (hidden from repr)
    """

    slurm_job_id: int
    model_name: str
    config: dict[str, Any]
    raw_output: str = field(repr=False)

StatusResponse `dataclass` ¶

Response from checking a model's status.

Parameters:

Name	Type	Description	Default
`model_name`	`str`	Name of the model	required
`server_status`	`ModelStatus`	Current status of the server	required
`job_state`	`Union[str, ModelStatus]`	Current state of the SLURM job	required
`raw_output`	`str`	Raw output from status check (hidden from repr)	required
`base_url`	`str`	Base URL of the model server if ready	`None`
`pending_reason`	`str`	Reason for pending state if applicable	`None`
`failed_reason`	`str`	Reason for failure if applicable	`None`

Source code in vec_inf/client/models.py

@dataclass
class StatusResponse:
    """Response from checking a model's status.

    Parameters
    ----------
    model_name : str
        Name of the model
    server_status : ModelStatus
        Current status of the server
    job_state : Union[str, ModelStatus]
        Current state of the SLURM job
    raw_output : str
        Raw output from status check (hidden from repr)
    base_url : str, optional
        Base URL of the model server if ready
    pending_reason : str, optional
        Reason for pending state if applicable
    failed_reason : str, optional
        Reason for failure if applicable
    """

    model_name: str
    server_status: ModelStatus
    job_state: Union[str, ModelStatus]
    raw_output: str = field(repr=False)
    base_url: Optional[str] = None
    pending_reason: Optional[str] = None
    failed_reason: Optional[str] = None

MetricsResponse `dataclass` ¶

Response from retrieving model metrics.

Parameters:

Name	Type	Description	Default
`model_name`	`str`	Name of the model	required
`metrics`	`Union[dict[str, float], str]`	Either a dictionary of metrics or an error message	required
`timestamp`	`float`	Unix timestamp of when metrics were collected	required

Source code in vec_inf/client/models.py

@dataclass
class MetricsResponse:
    """Response from retrieving model metrics.

    Parameters
    ----------
    model_name : str
        Name of the model
    metrics : Union[dict[str, float], str]
        Either a dictionary of metrics or an error message
    timestamp : float
        Unix timestamp of when metrics were collected
    """

    model_name: str
    metrics: Union[dict[str, float], str]
    timestamp: float

LaunchOptions `dataclass` ¶

Options for launching a model.

Parameters:

Name	Type	Description	Default
`model_family`	`str`	Family/architecture of the model	`None`
`model_variant`	`str`	Specific variant/version of the model	`None`
`partition`	`str`	SLURM partition to use	`None`
`num_nodes`	`int`	Number of nodes to allocate	`None`
`gpus_per_node`	`int`	Number of GPUs per node	`None`
`account`	`str`	Account name for job scheduling	`None`
`qos`	`str`	Quality of Service level	`None`
`time`	`str`	Time limit for the job	`None`
`exclude`	`str`	Exclude certain nodes from the resources granted to the job	`None`
`node_list`	`str`	Request a specific list of nodes for deployment	`None`
`bind`	`str`	Additional binds for the singularity container	`None`
`vocab_size`	`int`	Size of model vocabulary	`None`
`data_type`	`str`	Data type for model weights	`None`
`venv`	`str`	Virtual environment to use	`None`
`log_dir`	`str`	Directory for logs	`None`
`model_weights_parent_dir`	`str`	Parent directory containing model weights	`None`
`vllm_args`	`str`	Additional arguments for vLLM	`None`

Source code in vec_inf/client/models.py

@dataclass
class LaunchOptions:
    """Options for launching a model.

    Parameters
    ----------
    model_family : str, optional
        Family/architecture of the model
    model_variant : str, optional
        Specific variant/version of the model
    partition : str, optional
        SLURM partition to use
    num_nodes : int, optional
        Number of nodes to allocate
    gpus_per_node : int, optional
        Number of GPUs per node
    account : str, optional
        Account name for job scheduling
    qos : str, optional
        Quality of Service level
    time : str, optional
        Time limit for the job
    exclude : str, optional
        Exclude certain nodes from the resources granted to the job
    node_list : str, optional
        Request a specific list of nodes for deployment
    bind : str, optional
        Additional binds for the singularity container
    vocab_size : int, optional
        Size of model vocabulary
    data_type : str, optional
        Data type for model weights
    venv : str, optional
        Virtual environment to use
    log_dir : str, optional
        Directory for logs
    model_weights_parent_dir : str, optional
        Parent directory containing model weights
    vllm_args : str, optional
        Additional arguments for vLLM
    """

    model_family: Optional[str] = None
    model_variant: Optional[str] = None
    partition: Optional[str] = None
    num_nodes: Optional[int] = None
    gpus_per_node: Optional[int] = None
    account: Optional[str] = None
    qos: Optional[str] = None
    exclude: Optional[str] = None
    node_list: Optional[str] = None
    bind: Optional[str] = None
    time: Optional[str] = None
    vocab_size: Optional[int] = None
    data_type: Optional[str] = None
    venv: Optional[str] = None
    log_dir: Optional[str] = None
    model_weights_parent_dir: Optional[str] = None
    vllm_args: Optional[str] = None

ModelInfo `dataclass` ¶

Information about an available model.

Parameters:

Name	Type	Description	Default
`name`	`str`	Name of the model	required
`family`	`str`	Family/architecture of the model	required
`variant`	`str`	Specific variant/version of the model	required
`model_type`	`ModelType`	Type of the model	required
`config`	`dict[str, Any]`	Additional configuration parameters	required

Source code in vec_inf/client/models.py

@dataclass
class ModelInfo:
    """Information about an available model.

    Parameters
    ----------
    name : str
        Name of the model
    family : str
        Family/architecture of the model
    variant : str, optional
        Specific variant/version of the model
    model_type : ModelType
        Type of the model
    config : dict[str, Any]
        Additional configuration parameters
    """

    name: str
    family: str
    variant: Optional[str]
    model_type: ModelType
    config: dict[str, Any]

Python API Reference¶

Client Interface¶

vec_inf.client.api.VecInfClient ¶

__init__ ¶

list_models ¶

get_model_config ¶

launch_model ¶

get_status ¶

get_metrics ¶

shutdown_model ¶

wait_until_ready ¶

Data Models¶

vec_inf.client.models ¶

ModelStatus ¶

ModelType ¶

LaunchResponse dataclass ¶

StatusResponse dataclass ¶

MetricsResponse dataclass ¶

LaunchOptions dataclass ¶

ModelInfo dataclass ¶

init ¶

LaunchResponse `dataclass` ¶

StatusResponse `dataclass` ¶

MetricsResponse `dataclass` ¶

LaunchOptions `dataclass` ¶

ModelInfo `dataclass` ¶