Skip to content

Python API Reference

This section documents the Python API for vector-inference.

Client Interface

vec_inf.client.api.VecInfClient

Client for interacting with Vector Inference programmatically.

This class provides methods for launching models, checking their status, retrieving metrics, and shutting down models using the Vector Inference infrastructure.

Methods:

Name Description
list_models

List all available models

get_model_config

Get configuration for a specific model

launch_model

Launch a model on the cluster

get_status

Get status of a running model

get_metrics

Get performance metrics of a running model

shutdown_model

Shutdown a running model

wait_until_ready

Wait for a model to become ready

Examples:

>>> from vec_inf.api import VecInfClient
>>> client = VecInfClient()
>>> response = client.launch_model("Meta-Llama-3.1-8B-Instruct")
>>> job_id = response.slurm_job_id
>>> status = client.get_status(job_id)
>>> if status.status == ModelStatus.READY:
...     print(f"Model is ready at {status.base_url}")
>>> client.shutdown_model(job_id)
Source code in vec_inf/client/api.py
class VecInfClient:
    """Client for interacting with Vector Inference programmatically.

    This class provides methods for launching models, checking their status,
    retrieving metrics, and shutting down models using the Vector Inference
    infrastructure.

    Methods
    -------
    list_models()
        List all available models
    get_model_config(model_name)
        Get configuration for a specific model
    launch_model(model_name, options)
        Launch a model on the cluster
    get_status(slurm_job_id, log_dir)
        Get status of a running model
    get_metrics(slurm_job_id, log_dir)
        Get performance metrics of a running model
    shutdown_model(slurm_job_id)
        Shutdown a running model
    wait_until_ready(slurm_job_id, timeout_seconds, poll_interval_seconds, log_dir)
        Wait for a model to become ready

    Examples
    --------
    >>> from vec_inf.api import VecInfClient
    >>> client = VecInfClient()
    >>> response = client.launch_model("Meta-Llama-3.1-8B-Instruct")
    >>> job_id = response.slurm_job_id
    >>> status = client.get_status(job_id)
    >>> if status.status == ModelStatus.READY:
    ...     print(f"Model is ready at {status.base_url}")
    >>> client.shutdown_model(job_id)
    """

    def __init__(self) -> None:
        """Initialize the Vector Inference client."""
        pass

    def list_models(self) -> list[ModelInfo]:
        """List all available models.

        Returns
        -------
        list[ModelInfo]
            List of ModelInfo objects containing information about available models,
            including their configurations and specifications.
        """
        model_registry = ModelRegistry()
        return model_registry.get_all_models()

    def get_model_config(self, model_name: str) -> ModelConfig:
        """Get the configuration for a specific model.

        Parameters
        ----------
        model_name : str
            Name of the model to get configuration for

        Returns
        -------
        ModelConfig
            Complete configuration for the specified model

        Raises
        ------
        ModelNotFoundError
            If the specified model is not found in the configuration
        """
        model_registry = ModelRegistry()
        return model_registry.get_single_model_config(model_name)

    def launch_model(
        self, model_name: str, options: Optional[LaunchOptions] = None
    ) -> LaunchResponse:
        """Launch a model on the cluster.

        Parameters
        ----------
        model_name : str
            Name of the model to launch
        options : LaunchOptions, optional
            Launch options to override default configuration

        Returns
        -------
        LaunchResponse
            Response containing launch details including:
            - SLURM job ID
            - Model configuration
            - Launch status

        Raises
        ------
        ModelConfigurationError
            If the model configuration is invalid
        SlurmJobError
            If there's an error launching the SLURM job
        """
        # Convert LaunchOptions to dictionary if provided
        options_dict: dict[str, Any] = {}
        if options:
            options_dict = {k: v for k, v in vars(options).items() if v is not None}

        # Create and use the API Launch Helper
        model_launcher = ModelLauncher(model_name, options_dict)
        return model_launcher.launch()

    def get_status(
        self, slurm_job_id: int, log_dir: Optional[str] = None
    ) -> StatusResponse:
        """Get the status of a running model.

        Parameters
        ----------
        slurm_job_id : int
            The SLURM job ID to check
        log_dir : str, optional
            Path to the SLURM log directory. If None, uses default location

        Returns
        -------
        StatusResponse
            Status information including:
            - Model name
            - Server status
            - Job state
            - Base URL (if ready)
            - Error information (if failed)
        """
        model_status_monitor = ModelStatusMonitor(slurm_job_id, log_dir)
        return model_status_monitor.process_model_status()

    def get_metrics(
        self, slurm_job_id: int, log_dir: Optional[str] = None
    ) -> MetricsResponse:
        """Get the performance metrics of a running model.

        Parameters
        ----------
        slurm_job_id : int
            The SLURM job ID to get metrics for
        log_dir : str, optional
            Path to the SLURM log directory. If None, uses default location

        Returns
        -------
        MetricsResponse
            Response containing:
            - Model name
            - Performance metrics or error message
            - Timestamp of collection
        """
        performance_metrics_collector = PerformanceMetricsCollector(
            slurm_job_id, log_dir
        )

        metrics: Union[dict[str, float], str]
        if not performance_metrics_collector.metrics_url.startswith("http"):
            metrics = performance_metrics_collector.metrics_url
        else:
            metrics = performance_metrics_collector.fetch_metrics()

        return MetricsResponse(
            model_name=performance_metrics_collector.status_info.model_name,
            metrics=metrics,
            timestamp=time.time(),
        )

    def shutdown_model(self, slurm_job_id: int) -> bool:
        """Shutdown a running model.

        Parameters
        ----------
        slurm_job_id : int
            The SLURM job ID to shut down

        Returns
        -------
        bool
            True if the model was successfully shutdown

        Raises
        ------
        SlurmJobError
            If there was an error shutting down the model
        """
        shutdown_cmd = f"scancel {slurm_job_id}"
        _, stderr = run_bash_command(shutdown_cmd)
        if stderr:
            raise SlurmJobError(f"Failed to shutdown model: {stderr}")
        return True

    def wait_until_ready(
        self,
        slurm_job_id: int,
        timeout_seconds: int = 1800,
        poll_interval_seconds: int = 10,
        log_dir: Optional[str] = None,
    ) -> StatusResponse:
        """Wait until a model is ready or fails.

        Parameters
        ----------
        slurm_job_id : int
            The SLURM job ID to wait for
        timeout_seconds : int, optional
            Maximum time to wait in seconds, by default 1800 (30 mins)
        poll_interval_seconds : int, optional
            How often to check status in seconds, by default 10
        log_dir : str, optional
            Path to the SLURM log directory. If None, uses default location

        Returns
        -------
        StatusResponse
            Status information when the model becomes ready

        Raises
        ------
        SlurmJobError
            If the specified job is not found or there's an error with the job
        ServerError
            If the server fails to start within the timeout period
        APIError
            If there was an error checking the status

        Notes
        -----
        The timeout is reset if the model is still in PENDING state after the
        initial timeout period. This allows for longer queue times in the SLURM
        scheduler.
        """
        start_time = time.time()

        while True:
            status_info = self.get_status(slurm_job_id, log_dir)

            if status_info.server_status == ModelStatus.READY:
                return status_info

            if status_info.server_status == ModelStatus.FAILED:
                error_message = status_info.failed_reason or "Unknown error"
                raise ServerError(f"Model failed to start: {error_message}")

            if status_info.server_status == ModelStatus.SHUTDOWN:
                raise ServerError("Model was shutdown before it became ready")

            # Check timeout
            if time.time() - start_time > timeout_seconds:
                if status_info.server_status == ModelStatus.PENDING:
                    warnings.warn(
                        f"Model is still pending after {timeout_seconds} seconds, resetting timer...",
                        UserWarning,
                        stacklevel=2,
                    )
                    start_time = time.time()
                raise ServerError(
                    f"Timed out waiting for model to become ready after {timeout_seconds} seconds"
                )

            # Wait before checking again
            time.sleep(poll_interval_seconds)

__init__

__init__()

Initialize the Vector Inference client.

Source code in vec_inf/client/api.py
def __init__(self) -> None:
    """Initialize the Vector Inference client."""
    pass

list_models

list_models()

List all available models.

Returns:

Type Description
list[ModelInfo]

List of ModelInfo objects containing information about available models, including their configurations and specifications.

Source code in vec_inf/client/api.py
def list_models(self) -> list[ModelInfo]:
    """List all available models.

    Returns
    -------
    list[ModelInfo]
        List of ModelInfo objects containing information about available models,
        including their configurations and specifications.
    """
    model_registry = ModelRegistry()
    return model_registry.get_all_models()

get_model_config

get_model_config(model_name)

Get the configuration for a specific model.

Parameters:

Name Type Description Default
model_name str

Name of the model to get configuration for

required

Returns:

Type Description
ModelConfig

Complete configuration for the specified model

Raises:

Type Description
ModelNotFoundError

If the specified model is not found in the configuration

Source code in vec_inf/client/api.py
def get_model_config(self, model_name: str) -> ModelConfig:
    """Get the configuration for a specific model.

    Parameters
    ----------
    model_name : str
        Name of the model to get configuration for

    Returns
    -------
    ModelConfig
        Complete configuration for the specified model

    Raises
    ------
    ModelNotFoundError
        If the specified model is not found in the configuration
    """
    model_registry = ModelRegistry()
    return model_registry.get_single_model_config(model_name)

launch_model

launch_model(model_name, options=None)

Launch a model on the cluster.

Parameters:

Name Type Description Default
model_name str

Name of the model to launch

required
options LaunchOptions

Launch options to override default configuration

None

Returns:

Type Description
LaunchResponse

Response containing launch details including: - SLURM job ID - Model configuration - Launch status

Raises:

Type Description
ModelConfigurationError

If the model configuration is invalid

SlurmJobError

If there's an error launching the SLURM job

Source code in vec_inf/client/api.py
def launch_model(
    self, model_name: str, options: Optional[LaunchOptions] = None
) -> LaunchResponse:
    """Launch a model on the cluster.

    Parameters
    ----------
    model_name : str
        Name of the model to launch
    options : LaunchOptions, optional
        Launch options to override default configuration

    Returns
    -------
    LaunchResponse
        Response containing launch details including:
        - SLURM job ID
        - Model configuration
        - Launch status

    Raises
    ------
    ModelConfigurationError
        If the model configuration is invalid
    SlurmJobError
        If there's an error launching the SLURM job
    """
    # Convert LaunchOptions to dictionary if provided
    options_dict: dict[str, Any] = {}
    if options:
        options_dict = {k: v for k, v in vars(options).items() if v is not None}

    # Create and use the API Launch Helper
    model_launcher = ModelLauncher(model_name, options_dict)
    return model_launcher.launch()

get_status

get_status(slurm_job_id, log_dir=None)

Get the status of a running model.

Parameters:

Name Type Description Default
slurm_job_id int

The SLURM job ID to check

required
log_dir str

Path to the SLURM log directory. If None, uses default location

None

Returns:

Type Description
StatusResponse

Status information including: - Model name - Server status - Job state - Base URL (if ready) - Error information (if failed)

Source code in vec_inf/client/api.py
def get_status(
    self, slurm_job_id: int, log_dir: Optional[str] = None
) -> StatusResponse:
    """Get the status of a running model.

    Parameters
    ----------
    slurm_job_id : int
        The SLURM job ID to check
    log_dir : str, optional
        Path to the SLURM log directory. If None, uses default location

    Returns
    -------
    StatusResponse
        Status information including:
        - Model name
        - Server status
        - Job state
        - Base URL (if ready)
        - Error information (if failed)
    """
    model_status_monitor = ModelStatusMonitor(slurm_job_id, log_dir)
    return model_status_monitor.process_model_status()

get_metrics

get_metrics(slurm_job_id, log_dir=None)

Get the performance metrics of a running model.

Parameters:

Name Type Description Default
slurm_job_id int

The SLURM job ID to get metrics for

required
log_dir str

Path to the SLURM log directory. If None, uses default location

None

Returns:

Type Description
MetricsResponse

Response containing: - Model name - Performance metrics or error message - Timestamp of collection

Source code in vec_inf/client/api.py
def get_metrics(
    self, slurm_job_id: int, log_dir: Optional[str] = None
) -> MetricsResponse:
    """Get the performance metrics of a running model.

    Parameters
    ----------
    slurm_job_id : int
        The SLURM job ID to get metrics for
    log_dir : str, optional
        Path to the SLURM log directory. If None, uses default location

    Returns
    -------
    MetricsResponse
        Response containing:
        - Model name
        - Performance metrics or error message
        - Timestamp of collection
    """
    performance_metrics_collector = PerformanceMetricsCollector(
        slurm_job_id, log_dir
    )

    metrics: Union[dict[str, float], str]
    if not performance_metrics_collector.metrics_url.startswith("http"):
        metrics = performance_metrics_collector.metrics_url
    else:
        metrics = performance_metrics_collector.fetch_metrics()

    return MetricsResponse(
        model_name=performance_metrics_collector.status_info.model_name,
        metrics=metrics,
        timestamp=time.time(),
    )

shutdown_model

shutdown_model(slurm_job_id)

Shutdown a running model.

Parameters:

Name Type Description Default
slurm_job_id int

The SLURM job ID to shut down

required

Returns:

Type Description
bool

True if the model was successfully shutdown

Raises:

Type Description
SlurmJobError

If there was an error shutting down the model

Source code in vec_inf/client/api.py
def shutdown_model(self, slurm_job_id: int) -> bool:
    """Shutdown a running model.

    Parameters
    ----------
    slurm_job_id : int
        The SLURM job ID to shut down

    Returns
    -------
    bool
        True if the model was successfully shutdown

    Raises
    ------
    SlurmJobError
        If there was an error shutting down the model
    """
    shutdown_cmd = f"scancel {slurm_job_id}"
    _, stderr = run_bash_command(shutdown_cmd)
    if stderr:
        raise SlurmJobError(f"Failed to shutdown model: {stderr}")
    return True

wait_until_ready

wait_until_ready(
    slurm_job_id,
    timeout_seconds=1800,
    poll_interval_seconds=10,
    log_dir=None,
)

Wait until a model is ready or fails.

Parameters:

Name Type Description Default
slurm_job_id int

The SLURM job ID to wait for

required
timeout_seconds int

Maximum time to wait in seconds, by default 1800 (30 mins)

1800
poll_interval_seconds int

How often to check status in seconds, by default 10

10
log_dir str

Path to the SLURM log directory. If None, uses default location

None

Returns:

Type Description
StatusResponse

Status information when the model becomes ready

Raises:

Type Description
SlurmJobError

If the specified job is not found or there's an error with the job

ServerError

If the server fails to start within the timeout period

APIError

If there was an error checking the status

Notes

The timeout is reset if the model is still in PENDING state after the initial timeout period. This allows for longer queue times in the SLURM scheduler.

Source code in vec_inf/client/api.py
def wait_until_ready(
    self,
    slurm_job_id: int,
    timeout_seconds: int = 1800,
    poll_interval_seconds: int = 10,
    log_dir: Optional[str] = None,
) -> StatusResponse:
    """Wait until a model is ready or fails.

    Parameters
    ----------
    slurm_job_id : int
        The SLURM job ID to wait for
    timeout_seconds : int, optional
        Maximum time to wait in seconds, by default 1800 (30 mins)
    poll_interval_seconds : int, optional
        How often to check status in seconds, by default 10
    log_dir : str, optional
        Path to the SLURM log directory. If None, uses default location

    Returns
    -------
    StatusResponse
        Status information when the model becomes ready

    Raises
    ------
    SlurmJobError
        If the specified job is not found or there's an error with the job
    ServerError
        If the server fails to start within the timeout period
    APIError
        If there was an error checking the status

    Notes
    -----
    The timeout is reset if the model is still in PENDING state after the
    initial timeout period. This allows for longer queue times in the SLURM
    scheduler.
    """
    start_time = time.time()

    while True:
        status_info = self.get_status(slurm_job_id, log_dir)

        if status_info.server_status == ModelStatus.READY:
            return status_info

        if status_info.server_status == ModelStatus.FAILED:
            error_message = status_info.failed_reason or "Unknown error"
            raise ServerError(f"Model failed to start: {error_message}")

        if status_info.server_status == ModelStatus.SHUTDOWN:
            raise ServerError("Model was shutdown before it became ready")

        # Check timeout
        if time.time() - start_time > timeout_seconds:
            if status_info.server_status == ModelStatus.PENDING:
                warnings.warn(
                    f"Model is still pending after {timeout_seconds} seconds, resetting timer...",
                    UserWarning,
                    stacklevel=2,
                )
                start_time = time.time()
            raise ServerError(
                f"Timed out waiting for model to become ready after {timeout_seconds} seconds"
            )

        # Wait before checking again
        time.sleep(poll_interval_seconds)

Data Models

vec_inf.client.models

Data models for Vector Inference API.

This module contains the data model classes used by the Vector Inference API for both request parameters and response objects.

Classes:

Name Description
ModelStatus : Enum

Status states of a model

ModelType : Enum

Types of supported models

LaunchResponse : dataclass

Response from model launch operation

StatusResponse : dataclass

Response from model status check

MetricsResponse : dataclass

Response from metrics collection

LaunchOptions : dataclass

Options for model launch

LaunchOptionsDict : TypedDict

Dictionary representation of launch options

ModelInfo : datacitten

Information about available models

ModelStatus

Bases: str, Enum

Enum representing the possible status states of a model.

Attributes:

Name Type Description
PENDING str

Model is waiting for Slurm to allocate resources

LAUNCHING str

Model is in the process of starting

READY str

Model is running and ready to serve requests

FAILED str

Model failed to start or encountered an error

SHUTDOWN str

Model was intentionally stopped

UNAVAILABLE str

Model status cannot be determined

Source code in vec_inf/client/models.py
class ModelStatus(str, Enum):
    """Enum representing the possible status states of a model.

    Attributes
    ----------
    PENDING : str
        Model is waiting for Slurm to allocate resources
    LAUNCHING : str
        Model is in the process of starting
    READY : str
        Model is running and ready to serve requests
    FAILED : str
        Model failed to start or encountered an error
    SHUTDOWN : str
        Model was intentionally stopped
    UNAVAILABLE : str
        Model status cannot be determined
    """

    PENDING = "PENDING"
    LAUNCHING = "LAUNCHING"
    READY = "READY"
    FAILED = "FAILED"
    SHUTDOWN = "SHUTDOWN"
    UNAVAILABLE = "UNAVAILABLE"

ModelType

Bases: str, Enum

Enum representing the possible model types.

Attributes:

Name Type Description
LLM str

Large Language Model

VLM str

Vision Language Model

TEXT_EMBEDDING str

Text Embedding Model

REWARD_MODELING str

Reward Modeling Model

Source code in vec_inf/client/models.py
class ModelType(str, Enum):
    """Enum representing the possible model types.

    Attributes
    ----------
    LLM : str
        Large Language Model
    VLM : str
        Vision Language Model
    TEXT_EMBEDDING : str
        Text Embedding Model
    REWARD_MODELING : str
        Reward Modeling Model
    """

    LLM = "LLM"
    VLM = "VLM"
    TEXT_EMBEDDING = "Text_Embedding"
    REWARD_MODELING = "Reward_Modeling"

LaunchResponse dataclass

Response from launching a model.

Parameters:

Name Type Description Default
slurm_job_id int

ID of the launched SLURM job

required
model_name str

Name of the launched model

required
config dict[str, Any]

Configuration used for the launch

required
raw_output str

Raw output from the launch command (hidden from repr)

required
Source code in vec_inf/client/models.py
@dataclass
class LaunchResponse:
    """Response from launching a model.

    Parameters
    ----------
    slurm_job_id : int
        ID of the launched SLURM job
    model_name : str
        Name of the launched model
    config : dict[str, Any]
        Configuration used for the launch
    raw_output : str
        Raw output from the launch command (hidden from repr)
    """

    slurm_job_id: int
    model_name: str
    config: dict[str, Any]
    raw_output: str = field(repr=False)

StatusResponse dataclass

Response from checking a model's status.

Parameters:

Name Type Description Default
model_name str

Name of the model

required
server_status ModelStatus

Current status of the server

required
job_state Union[str, ModelStatus]

Current state of the SLURM job

required
raw_output str

Raw output from status check (hidden from repr)

required
base_url str

Base URL of the model server if ready

None
pending_reason str

Reason for pending state if applicable

None
failed_reason str

Reason for failure if applicable

None
Source code in vec_inf/client/models.py
@dataclass
class StatusResponse:
    """Response from checking a model's status.

    Parameters
    ----------
    model_name : str
        Name of the model
    server_status : ModelStatus
        Current status of the server
    job_state : Union[str, ModelStatus]
        Current state of the SLURM job
    raw_output : str
        Raw output from status check (hidden from repr)
    base_url : str, optional
        Base URL of the model server if ready
    pending_reason : str, optional
        Reason for pending state if applicable
    failed_reason : str, optional
        Reason for failure if applicable
    """

    model_name: str
    server_status: ModelStatus
    job_state: Union[str, ModelStatus]
    raw_output: str = field(repr=False)
    base_url: Optional[str] = None
    pending_reason: Optional[str] = None
    failed_reason: Optional[str] = None

MetricsResponse dataclass

Response from retrieving model metrics.

Parameters:

Name Type Description Default
model_name str

Name of the model

required
metrics Union[dict[str, float], str]

Either a dictionary of metrics or an error message

required
timestamp float

Unix timestamp of when metrics were collected

required
Source code in vec_inf/client/models.py
@dataclass
class MetricsResponse:
    """Response from retrieving model metrics.

    Parameters
    ----------
    model_name : str
        Name of the model
    metrics : Union[dict[str, float], str]
        Either a dictionary of metrics or an error message
    timestamp : float
        Unix timestamp of when metrics were collected
    """

    model_name: str
    metrics: Union[dict[str, float], str]
    timestamp: float

LaunchOptions dataclass

Options for launching a model.

Parameters:

Name Type Description Default
model_family str

Family/architecture of the model

None
model_variant str

Specific variant/version of the model

None
partition str

SLURM partition to use

None
num_nodes int

Number of nodes to allocate

None
gpus_per_node int

Number of GPUs per node

None
account str

Account name for job scheduling

None
qos str

Quality of Service level

None
time str

Time limit for the job

None
vocab_size int

Size of model vocabulary

None
data_type str

Data type for model weights

None
venv str

Virtual environment to use

None
log_dir str

Directory for logs

None
model_weights_parent_dir str

Parent directory containing model weights

None
vllm_args str

Additional arguments for vLLM

None
Source code in vec_inf/client/models.py
@dataclass
class LaunchOptions:
    """Options for launching a model.

    Parameters
    ----------
    model_family : str, optional
        Family/architecture of the model
    model_variant : str, optional
        Specific variant/version of the model
    partition : str, optional
        SLURM partition to use
    num_nodes : int, optional
        Number of nodes to allocate
    gpus_per_node : int, optional
        Number of GPUs per node
    account : str, optional
        Account name for job scheduling
    qos : str, optional
        Quality of Service level
    time : str, optional
        Time limit for the job
    vocab_size : int, optional
        Size of model vocabulary
    data_type : str, optional
        Data type for model weights
    venv : str, optional
        Virtual environment to use
    log_dir : str, optional
        Directory for logs
    model_weights_parent_dir : str, optional
        Parent directory containing model weights
    vllm_args : str, optional
        Additional arguments for vLLM
    """

    model_family: Optional[str] = None
    model_variant: Optional[str] = None
    partition: Optional[str] = None
    num_nodes: Optional[int] = None
    gpus_per_node: Optional[int] = None
    account: Optional[str] = None
    qos: Optional[str] = None
    time: Optional[str] = None
    vocab_size: Optional[int] = None
    data_type: Optional[str] = None
    venv: Optional[str] = None
    log_dir: Optional[str] = None
    model_weights_parent_dir: Optional[str] = None
    vllm_args: Optional[str] = None

ModelInfo dataclass

Information about an available model.

Parameters:

Name Type Description Default
name str

Name of the model

required
family str

Family/architecture of the model

required
variant str

Specific variant/version of the model

required
model_type ModelType

Type of the model

required
config dict[str, Any]

Additional configuration parameters

required
Source code in vec_inf/client/models.py
@dataclass
class ModelInfo:
    """Information about an available model.

    Parameters
    ----------
    name : str
        Name of the model
    family : str
        Family/architecture of the model
    variant : str, optional
        Specific variant/version of the model
    model_type : ModelType
        Type of the model
    config : dict[str, Any]
        Additional configuration parameters
    """

    name: str
    family: str
    variant: Optional[str]
    model_type: ModelType
    config: dict[str, Any]