Skip to content

API Reference

Reference for the unbias_plus package: pipeline, model, schema, FastAPI server, CLI, prompt, parser, and formatters. All public classes and functions are listed below.

Package

unbias_plus

unbias-plus: Bias detection and debiasing using a single LLM.

UnBiasPlus

Main pipeline for bias detection and debiasing.

Loads a fine-tuned LLM and exposes a simple interface for analyzing text for bias. Combines prompt building, inference, JSON parsing, offset computation, and formatting.

Parameters:

Name Type Description Default
model_name_or_path str | Path

HuggingFace model ID or local path to the fine-tuned model. Defaults to 'vector-institute/Qwen3-4B-UnBias-Plus-SFT'.

DEFAULT_MODEL
device str | None

Device to run on ('cuda' or 'cpu'). Auto-detected if None.

None
load_in_4bit bool

Load model in 4-bit quantization. Default is False.

False
max_new_tokens int

Maximum tokens to generate. Default is 4096.

4096

Examples:

>>> from unbias_plus import UnBiasPlus
>>> pipe = UnBiasPlus()
>>> result = pipe.analyze("Women are too emotional to lead.")
>>> print(result.binary_label)
biased
Source code in src/unbias_plus/pipeline.py
class UnBiasPlus:
    """Main pipeline for bias detection and debiasing.

    Loads a fine-tuned LLM and exposes a simple interface for
    analyzing text for bias. Combines prompt building, inference,
    JSON parsing, offset computation, and formatting.

    Parameters
    ----------
    model_name_or_path : str | Path
        HuggingFace model ID or local path to the fine-tuned
        model. Defaults to 'vector-institute/Qwen3-4B-UnBias-Plus-SFT'.
    device : str | None, optional
        Device to run on ('cuda' or 'cpu'). Auto-detected if None.
    load_in_4bit : bool, optional
        Load model in 4-bit quantization. Default is False.
    max_new_tokens : int, optional
        Maximum tokens to generate. Default is 4096.

    Examples
    --------
    >>> from unbias_plus import UnBiasPlus  # doctest: +SKIP
    >>> pipe = UnBiasPlus()  # doctest: +SKIP
    >>> result = pipe.analyze("Women are too emotional to lead.")  # doctest: +SKIP
    >>> print(result.binary_label)  # doctest: +SKIP
    biased

    """

    def __init__(
        self,
        model_name_or_path: str | Path = DEFAULT_MODEL,
        device: str | None = None,
        load_in_4bit: bool = False,
        max_new_tokens: int = 4096,
    ) -> None:
        self._model = UnBiasModel(
            model_name_or_path=model_name_or_path,
            device=device,
            load_in_4bit=load_in_4bit,
            max_new_tokens=max_new_tokens,
        )

    def analyze(self, text: str) -> BiasResult:
        """Analyze input text for bias.

        Runs the full pipeline: builds chat messages, runs inference,
        parses JSON output, computes character offsets for each
        segment, and attaches the original text to the result.

        Parameters
        ----------
        text : str
            The input text to analyze.

        Returns
        -------
        BiasResult
            Structured bias result with start/end offsets on each
            segment and original_text populated.

        Raises
        ------
        ValueError
            If the LLM output cannot be parsed into a valid BiasResult.

        Examples
        --------
        >>> result = pipe.analyze("All politicians are liars.")  # doctest: +SKIP
        >>> result.bias_found  # doctest: +SKIP
        True

        """
        messages = build_messages(text)
        raw_output = self._model.generate(messages)
        result = parse_llm_output(raw_output)

        # Compute character-level offsets for frontend highlighting
        segments_with_offsets = compute_offsets(text, result.biased_segments)

        return result.model_copy(
            update={
                "biased_segments": segments_with_offsets,
                "original_text": text,
            }
        )

    def analyze_to_cli(self, text: str) -> str:
        """Analyze text and return a formatted CLI string.

        Parameters
        ----------
        text : str
            The input text to analyze.

        Returns
        -------
        str
            Human-readable colored string for terminal display.

        """
        return format_cli(self.analyze(text))

    def analyze_to_dict(self, text: str) -> dict:
        """Analyze text and return result as a plain dictionary.

        Parameters
        ----------
        text : str
            The input text to analyze.

        Returns
        -------
        dict
            Plain dictionary representation of the result.

        """
        return format_dict(self.analyze(text))

    def analyze_to_json(self, text: str) -> str:
        """Analyze text and return result as a JSON string.

        Parameters
        ----------
        text : str
            The input text to analyze.

        Returns
        -------
        str
            Pretty-printed JSON string of the result.

        """
        return format_json(self.analyze(text))

analyze

analyze(text)

Analyze input text for bias.

Runs the full pipeline: builds chat messages, runs inference, parses JSON output, computes character offsets for each segment, and attaches the original text to the result.

Parameters:

Name Type Description Default
text str

The input text to analyze.

required

Returns:

Type Description
BiasResult

Structured bias result with start/end offsets on each segment and original_text populated.

Raises:

Type Description
ValueError

If the LLM output cannot be parsed into a valid BiasResult.

Examples:

>>> result = pipe.analyze("All politicians are liars.")
>>> result.bias_found
True
Source code in src/unbias_plus/pipeline.py
def analyze(self, text: str) -> BiasResult:
    """Analyze input text for bias.

    Runs the full pipeline: builds chat messages, runs inference,
    parses JSON output, computes character offsets for each
    segment, and attaches the original text to the result.

    Parameters
    ----------
    text : str
        The input text to analyze.

    Returns
    -------
    BiasResult
        Structured bias result with start/end offsets on each
        segment and original_text populated.

    Raises
    ------
    ValueError
        If the LLM output cannot be parsed into a valid BiasResult.

    Examples
    --------
    >>> result = pipe.analyze("All politicians are liars.")  # doctest: +SKIP
    >>> result.bias_found  # doctest: +SKIP
    True

    """
    messages = build_messages(text)
    raw_output = self._model.generate(messages)
    result = parse_llm_output(raw_output)

    # Compute character-level offsets for frontend highlighting
    segments_with_offsets = compute_offsets(text, result.biased_segments)

    return result.model_copy(
        update={
            "biased_segments": segments_with_offsets,
            "original_text": text,
        }
    )

analyze_to_cli

analyze_to_cli(text)

Analyze text and return a formatted CLI string.

Parameters:

Name Type Description Default
text str

The input text to analyze.

required

Returns:

Type Description
str

Human-readable colored string for terminal display.

Source code in src/unbias_plus/pipeline.py
def analyze_to_cli(self, text: str) -> str:
    """Analyze text and return a formatted CLI string.

    Parameters
    ----------
    text : str
        The input text to analyze.

    Returns
    -------
    str
        Human-readable colored string for terminal display.

    """
    return format_cli(self.analyze(text))

analyze_to_dict

analyze_to_dict(text)

Analyze text and return result as a plain dictionary.

Parameters:

Name Type Description Default
text str

The input text to analyze.

required

Returns:

Type Description
dict

Plain dictionary representation of the result.

Source code in src/unbias_plus/pipeline.py
def analyze_to_dict(self, text: str) -> dict:
    """Analyze text and return result as a plain dictionary.

    Parameters
    ----------
    text : str
        The input text to analyze.

    Returns
    -------
    dict
        Plain dictionary representation of the result.

    """
    return format_dict(self.analyze(text))

analyze_to_json

analyze_to_json(text)

Analyze text and return result as a JSON string.

Parameters:

Name Type Description Default
text str

The input text to analyze.

required

Returns:

Type Description
str

Pretty-printed JSON string of the result.

Source code in src/unbias_plus/pipeline.py
def analyze_to_json(self, text: str) -> str:
    """Analyze text and return result as a JSON string.

    Parameters
    ----------
    text : str
        The input text to analyze.

    Returns
    -------
    str
        Pretty-printed JSON string of the result.

    """
    return format_json(self.analyze(text))

BiasedSegment

Bases: BaseModel

A single biased segment detected in the text.

Attributes:

Name Type Description
original str

The original biased phrase from the input text.

replacement str

The suggested neutral replacement. Defaults to empty string if the model omits it (e.g. under 4-bit quantization).

severity str

Severity level: 'low', 'medium', or 'high'. Defaults to 'medium' if omitted by the model.

bias_type str

Type of bias (e.g. 'loaded language', 'framing bias').

reasoning str

Explanation of why this segment is considered biased.

start int | None

Character offset start in the original text. Computed by the pipeline after parsing.

end int | None

Character offset end in the original text. Computed by the pipeline after parsing.

Examples:

>>> seg = BiasedSegment(
...     original="Sharia-obsessed fanatics",
...     replacement="extremist groups",
...     severity="high",
...     bias_type="dehumanizing framing",
...     reasoning="Uses inflammatory religious language.",
... )
>>> seg.severity
'high'
Source code in src/unbias_plus/schema.py
class BiasedSegment(BaseModel):
    """A single biased segment detected in the text.

    Attributes
    ----------
    original : str
        The original biased phrase from the input text.
    replacement : str
        The suggested neutral replacement. Defaults to empty string
        if the model omits it (e.g. under 4-bit quantization).
    severity : str
        Severity level: 'low', 'medium', or 'high'.
        Defaults to 'medium' if omitted by the model.
    bias_type : str
        Type of bias (e.g. 'loaded language', 'framing bias').
    reasoning : str
        Explanation of why this segment is considered biased.
    start : int | None
        Character offset start in the original text. Computed
        by the pipeline after parsing.
    end : int | None
        Character offset end in the original text. Computed
        by the pipeline after parsing.

    Examples
    --------
    >>> seg = BiasedSegment(
    ...     original="Sharia-obsessed fanatics",
    ...     replacement="extremist groups",
    ...     severity="high",
    ...     bias_type="dehumanizing framing",
    ...     reasoning="Uses inflammatory religious language.",
    ... )
    >>> seg.severity
    'high'

    """

    original: str
    replacement: str = ""  # optional — model may omit under 4-bit quantization
    severity: str = "medium"  # optional — defaults to medium if omitted
    bias_type: str = ""
    reasoning: str = ""
    start: int | None = None
    end: int | None = None

    @field_validator("severity")
    @classmethod
    def validate_severity(cls, v: str) -> str:
        """Validate and normalise segment severity to low/medium/high."""
        allowed = {"low", "medium", "high"}
        normalized = v.lower().strip()
        if normalized not in allowed:
            logger.warning(
                "Unexpected segment severity '%s', defaulting to 'medium'", v
            )
            return "medium"
        return normalized

validate_severity classmethod

validate_severity(v)

Validate and normalise segment severity to low/medium/high.

Source code in src/unbias_plus/schema.py
@field_validator("severity")
@classmethod
def validate_severity(cls, v: str) -> str:
    """Validate and normalise segment severity to low/medium/high."""
    allowed = {"low", "medium", "high"}
    normalized = v.lower().strip()
    if normalized not in allowed:
        logger.warning(
            "Unexpected segment severity '%s', defaulting to 'medium'", v
        )
        return "medium"
    return normalized

BiasResult

Bases: BaseModel

Full bias analysis result for an input text.

Attributes:

Name Type Description
binary_label str

Overall label: 'biased' or 'unbiased'.

severity int

Overall severity score: 0 = neutral / no bias 2 = recurring biased framing 3 = strong persuasive tone 4 = inflammatory rhetoric If the model returns a string ('low', 'medium', 'high'), it is coerced to the nearest integer value.

bias_found bool

Whether any bias was detected in the text.

biased_segments list[BiasedSegment]

List of biased segments found in the text, each with character-level start/end offsets.

unbiased_text str

Full neutral rewrite of the input text.

original_text str | None

The original input text. Set by the pipeline.

Examples:

>>> result = BiasResult(
...     binary_label="biased",
...     severity=3,
...     bias_found=True,
...     biased_segments=[],
...     unbiased_text="A neutral version of the text.",
... )
>>> result.binary_label
'biased'
Source code in src/unbias_plus/schema.py
class BiasResult(BaseModel):
    """Full bias analysis result for an input text.

    Attributes
    ----------
    binary_label : str
        Overall label: 'biased' or 'unbiased'.
    severity : int
        Overall severity score:
          0 = neutral / no bias
          2 = recurring biased framing
          3 = strong persuasive tone
          4 = inflammatory rhetoric
        If the model returns a string ('low', 'medium', 'high'),
        it is coerced to the nearest integer value.
    bias_found : bool
        Whether any bias was detected in the text.
    biased_segments : list[BiasedSegment]
        List of biased segments found in the text, each with
        character-level start/end offsets.
    unbiased_text : str
        Full neutral rewrite of the input text.
    original_text : str | None
        The original input text. Set by the pipeline.

    Examples
    --------
    >>> result = BiasResult(
    ...     binary_label="biased",
    ...     severity=3,
    ...     bias_found=True,
    ...     biased_segments=[],
    ...     unbiased_text="A neutral version of the text.",
    ... )
    >>> result.binary_label
    'biased'

    """

    binary_label: str
    severity: int
    bias_found: bool
    biased_segments: list[BiasedSegment]
    unbiased_text: str
    original_text: str | None = None

    @field_validator("binary_label")
    @classmethod
    def validate_binary_label(cls, v: str) -> str:
        """Validate binary_label is 'biased' or 'unbiased'."""
        allowed = {"biased", "unbiased"}
        normalized = v.lower().strip()
        if normalized not in allowed:
            raise ValueError(f"binary_label must be one of {allowed}, got '{v}'")
        return normalized

    @field_validator("severity", mode="before")
    @classmethod
    def validate_severity(cls, v: int | str) -> int:
        """Coerce and validate global severity.

        Accepts:
          - int 0, 2, 3, 4  (correct model output)
          - str 'low', 'medium', 'high', 'none'  (model confused scales)
          - any other int   (clamped to nearest valid value)
        """
        # String coercion — model confused global vs segment severity scale
        if isinstance(v, str):
            normalized = v.lower().strip()
            if normalized in _STR_TO_INT_SEVERITY:
                coerced = _STR_TO_INT_SEVERITY[normalized]
                logger.warning(
                    "Global severity returned as string '%s', coerced to %d",
                    v,
                    coerced,
                )
                return coerced
            # Try parsing as int string e.g. "3"
            try:
                v = int(v)
            except ValueError:
                logger.warning("Unrecognized severity '%s', defaulting to 2", v)
                return 2

        # Clamp out-of-range integer values gracefully
        if v <= 0:
            return 0
        if v in {2, 3, 4}:
            return v
        if v == 1:
            return 2
        return 4  # anything > 4

validate_binary_label classmethod

validate_binary_label(v)

Validate binary_label is 'biased' or 'unbiased'.

Source code in src/unbias_plus/schema.py
@field_validator("binary_label")
@classmethod
def validate_binary_label(cls, v: str) -> str:
    """Validate binary_label is 'biased' or 'unbiased'."""
    allowed = {"biased", "unbiased"}
    normalized = v.lower().strip()
    if normalized not in allowed:
        raise ValueError(f"binary_label must be one of {allowed}, got '{v}'")
    return normalized

validate_severity classmethod

validate_severity(v)

Coerce and validate global severity.

Accepts: - int 0, 2, 3, 4 (correct model output) - str 'low', 'medium', 'high', 'none' (model confused scales) - any other int (clamped to nearest valid value)

Source code in src/unbias_plus/schema.py
@field_validator("severity", mode="before")
@classmethod
def validate_severity(cls, v: int | str) -> int:
    """Coerce and validate global severity.

    Accepts:
      - int 0, 2, 3, 4  (correct model output)
      - str 'low', 'medium', 'high', 'none'  (model confused scales)
      - any other int   (clamped to nearest valid value)
    """
    # String coercion — model confused global vs segment severity scale
    if isinstance(v, str):
        normalized = v.lower().strip()
        if normalized in _STR_TO_INT_SEVERITY:
            coerced = _STR_TO_INT_SEVERITY[normalized]
            logger.warning(
                "Global severity returned as string '%s', coerced to %d",
                v,
                coerced,
            )
            return coerced
        # Try parsing as int string e.g. "3"
        try:
            v = int(v)
        except ValueError:
            logger.warning("Unrecognized severity '%s', defaulting to 2", v)
            return 2

    # Clamp out-of-range integer values gracefully
    if v <= 0:
        return 0
    if v in {2, 3, 4}:
        return v
    if v == 1:
        return 2
    return 4  # anything > 4

serve

serve(
    model_name_or_path=DEFAULT_MODEL,
    host="0.0.0.0",
    port=8000,
    load_in_4bit=False,
    reload=False,
)

Start the unbias-plus API server with the demo UI.

Loads the model and starts a uvicorn server. The demo UI is served at http://localhost:{port}/ and the API is at http://localhost:{port}/analyze.

Parameters:

Name Type Description Default
model_name_or_path str | Path

HuggingFace model ID or local path to the model.

DEFAULT_MODEL
host str

Host address to bind to. Default is '0.0.0.0'.

'0.0.0.0'
port int

Port to listen on. Default is 8000.

8000
load_in_4bit bool

Load model in 4-bit quantization. Default is False.

False
reload bool

Enable auto-reload on code changes. Default is False.

False

Examples:

>>> from unbias_plus.api import serve
>>> serve("Qwen/Qwen3-4B", port=8000)
Source code in src/unbias_plus/api.py
def serve(
    model_name_or_path: str | Path = DEFAULT_MODEL,
    host: str = "0.0.0.0",
    port: int = 8000,
    load_in_4bit: bool = False,
    reload: bool = False,
) -> None:
    """Start the unbias-plus API server with the demo UI.

    Loads the model and starts a uvicorn server. The demo UI
    is served at http://localhost:{port}/ and the API is at
    http://localhost:{port}/analyze.

    Parameters
    ----------
    model_name_or_path : str | Path
        HuggingFace model ID or local path to the model.
    host : str
        Host address to bind to. Default is '0.0.0.0'.
    port : int
        Port to listen on. Default is 8000.
    load_in_4bit : bool
        Load model in 4-bit quantization. Default is False.
    reload : bool
        Enable auto-reload on code changes. Default is False.

    Examples
    --------
    >>> from unbias_plus.api import serve
    >>> serve("Qwen/Qwen3-4B", port=8000)  # doctest: +SKIP


    """
    app.state.model_name_or_path = str(model_name_or_path)
    app.state.load_in_4bit = load_in_4bit
    print(f"Starting unbias-plus server at http://localhost:{port}")
    uvicorn.run(app, host=host, port=port, reload=reload)

api

FastAPI server for unbias-plus.

AnalyzeRequest

Bases: BaseModel

Request body for the analyze endpoint.

Attributes:

Name Type Description
text str

The input text to analyze for bias.

Source code in src/unbias_plus/api.py
class AnalyzeRequest(BaseModel):
    """Request body for the analyze endpoint.

    Attributes
    ----------
    text : str
        The input text to analyze for bias.
    """

    text: str

HealthResponse

Bases: BaseModel

Response body for the health endpoint.

Attributes:

Name Type Description
status str

Server status string.

model str

Currently loaded model name or path.

Source code in src/unbias_plus/api.py
class HealthResponse(BaseModel):
    """Response body for the health endpoint.

    Attributes
    ----------
    status : str
        Server status string.
    model : str
        Currently loaded model name or path.
    """

    status: str
    model: str

lifespan async

lifespan(app)

Load the model on startup and release on shutdown.

Parameters:

Name Type Description Default
app FastAPI

The FastAPI application instance.

required

Yields:

Type Description
None
Source code in src/unbias_plus/api.py
@asynccontextmanager
async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
    """Load the model on startup and release on shutdown.

    Parameters
    ----------
    app : FastAPI
        The FastAPI application instance.

    Yields
    ------
    None

    """
    model_path = getattr(app.state, "model_name_or_path", DEFAULT_MODEL)
    load_in_4bit = getattr(app.state, "load_in_4bit", False)
    app.state.pipe = UnBiasPlus(
        model_name_or_path=model_path,
        load_in_4bit=load_in_4bit,
    )
    yield
    app.state.pipe = None

index

index()

Serve the demo UI.

Returns:

Type Description
str

HTML content of the demo page.

Raises:

Type Description
HTTPException

404 if the demo directory is not found.

Source code in src/unbias_plus/api.py
@app.get("/", response_class=HTMLResponse)
def index() -> str:
    """Serve the demo UI.

    Returns
    -------
    str
        HTML content of the demo page.

    Raises
    ------
    HTTPException
        404 if the demo directory is not found.

    """
    html_file = DEMO_DIR / "templates" / "index.html"
    if not html_file.exists():
        raise HTTPException(status_code=404, detail="Demo UI not found.")
    return html_file.read_text()

health

health(request)

Check if the server and model are ready.

Returns:

Type Description
HealthResponse

Server status and loaded model name.

Source code in src/unbias_plus/api.py
@app.get("/health", response_model=HealthResponse)
def health(request: Request) -> HealthResponse:
    """Check if the server and model are ready.

    Returns
    -------
    HealthResponse
        Server status and loaded model name.

    """
    pipe = getattr(request.app.state, "pipe", None)
    return HealthResponse(
        status="ok",
        model=str(pipe._model.model_name_or_path) if pipe else "not loaded",
    )

analyze

analyze(request, body)

Analyze input text for bias.

Parameters:

Name Type Description Default
request Request

FastAPI request (for app state).

required
body AnalyzeRequest

Request body containing the text to analyze.

required

Returns:

Type Description
BiasResult

Structured bias analysis result with character offsets.

Raises:

Type Description
HTTPException

500 if the model is not loaded or inference fails.

HTTPException

422 if the model output cannot be parsed.

Source code in src/unbias_plus/api.py
@app.post("/analyze", response_model=BiasResult)
def analyze(request: Request, body: AnalyzeRequest) -> BiasResult:
    """Analyze input text for bias.

    Parameters
    ----------
    request : Request
        FastAPI request (for app state).
    body : AnalyzeRequest
        Request body containing the text to analyze.

    Returns
    -------
    BiasResult
        Structured bias analysis result with character offsets.

    Raises
    ------
    HTTPException
        500 if the model is not loaded or inference fails.
    HTTPException
        422 if the model output cannot be parsed.

    """
    pipe = getattr(request.app.state, "pipe", None)
    if pipe is None:
        raise HTTPException(status_code=500, detail="Model not loaded.")
    try:
        return cast(BiasResult, pipe.analyze(body.text))
    except ValueError as e:
        raise HTTPException(status_code=422, detail=str(e)) from e

serve

serve(
    model_name_or_path=DEFAULT_MODEL,
    host="0.0.0.0",
    port=8000,
    load_in_4bit=False,
    reload=False,
)

Start the unbias-plus API server with the demo UI.

Loads the model and starts a uvicorn server. The demo UI is served at http://localhost:{port}/ and the API is at http://localhost:{port}/analyze.

Parameters:

Name Type Description Default
model_name_or_path str | Path

HuggingFace model ID or local path to the model.

DEFAULT_MODEL
host str

Host address to bind to. Default is '0.0.0.0'.

'0.0.0.0'
port int

Port to listen on. Default is 8000.

8000
load_in_4bit bool

Load model in 4-bit quantization. Default is False.

False
reload bool

Enable auto-reload on code changes. Default is False.

False

Examples:

>>> from unbias_plus.api import serve
>>> serve("Qwen/Qwen3-4B", port=8000)
Source code in src/unbias_plus/api.py
def serve(
    model_name_or_path: str | Path = DEFAULT_MODEL,
    host: str = "0.0.0.0",
    port: int = 8000,
    load_in_4bit: bool = False,
    reload: bool = False,
) -> None:
    """Start the unbias-plus API server with the demo UI.

    Loads the model and starts a uvicorn server. The demo UI
    is served at http://localhost:{port}/ and the API is at
    http://localhost:{port}/analyze.

    Parameters
    ----------
    model_name_or_path : str | Path
        HuggingFace model ID or local path to the model.
    host : str
        Host address to bind to. Default is '0.0.0.0'.
    port : int
        Port to listen on. Default is 8000.
    load_in_4bit : bool
        Load model in 4-bit quantization. Default is False.
    reload : bool
        Enable auto-reload on code changes. Default is False.

    Examples
    --------
    >>> from unbias_plus.api import serve
    >>> serve("Qwen/Qwen3-4B", port=8000)  # doctest: +SKIP


    """
    app.state.model_name_or_path = str(model_name_or_path)
    app.state.load_in_4bit = load_in_4bit
    print(f"Starting unbias-plus server at http://localhost:{port}")
    uvicorn.run(app, host=host, port=port, reload=reload)

cli

CLI entry point for unbias-plus.

parse_args

parse_args()

Parse CLI arguments.

Returns:

Type Description
Namespace

Parsed arguments.

Source code in src/unbias_plus/cli.py
def parse_args() -> argparse.Namespace:
    """Parse CLI arguments.

    Returns
    -------
    argparse.Namespace
        Parsed arguments.

    """
    parser = argparse.ArgumentParser(
        prog="unbias-plus",
        description="Detect and debias text using a single LLM.",
    )

    input_group = parser.add_mutually_exclusive_group()
    input_group.add_argument(
        "--text",
        type=str,
        help="Text string to analyze.",
    )
    input_group.add_argument(
        "--file",
        type=str,
        help="Path to a .txt file to analyze.",
    )
    input_group.add_argument(
        "--serve",
        action="store_true",
        default=False,
        help="Start the FastAPI server.",
    )

    parser.add_argument(
        "--model",
        type=str,
        default=DEFAULT_MODEL,
        help=f"HuggingFace model ID or local path. Default: {DEFAULT_MODEL}",
    )
    parser.add_argument(
        "--load-in-4bit",
        action="store_true",
        default=False,
        help="Load model in 4-bit quantization to reduce VRAM usage.",
    )
    parser.add_argument(
        "--json",
        action="store_true",
        default=False,
        help="Output result as raw JSON instead of formatted CLI display.",
    )
    parser.add_argument(
        "--max-new-tokens",
        type=int,
        default=2048,
        help="Maximum number of tokens to generate. Default: 1024",
    )
    parser.add_argument(
        "--host",
        type=str,
        default="0.0.0.0",
        help="Host for the API server. Default: 0.0.0.0",
    )
    parser.add_argument(
        "--port",
        type=int,
        default=8000,
        help="Port for the API server. Default: 8000",
    )

    return parser.parse_args()

main

main()

Run the unbias-plus CLI.

Examples:

$ unbias-plus --text "Women are too emotional to lead." $ unbias-plus --file article.txt --json $ unbias-plus --serve --model path/to/model --port 8000 $ unbias-plus --serve --load-in-4bit

Source code in src/unbias_plus/cli.py
def main() -> None:
    """Run the unbias-plus CLI.

    Examples
    --------
    $ unbias-plus --text "Women are too emotional to lead."
    $ unbias-plus --file article.txt --json
    $ unbias-plus --serve --model path/to/model --port 8000
    $ unbias-plus --serve --load-in-4bit

    """
    args = parse_args()

    if args.serve:
        serve(
            model_name_or_path=args.model,
            host=args.host,
            port=args.port,
            load_in_4bit=args.load_in_4bit,
        )
        return

    if not args.text and not args.file:
        print(
            "Error: one of --text, --file, or --serve is required.",
            file=sys.stderr,
        )
        sys.exit(1)

    if args.file:
        try:
            with open(args.file) as f:
                text = f.read()
        except FileNotFoundError:
            print(f"Error: file '{args.file}' not found.", file=sys.stderr)
            sys.exit(1)
    else:
        text = args.text

    pipe = UnBiasPlus(
        model_name_or_path=args.model,
        load_in_4bit=args.load_in_4bit,
        max_new_tokens=args.max_new_tokens,
    )

    if args.json:
        print(pipe.analyze_to_json(text))
    else:
        print(pipe.analyze_to_cli(text))

formatter

Formatters for displaying BiasResult output.

format_cli

format_cli(result)

Format a BiasResult for CLI terminal display.

Produces a human-readable, colored terminal output showing the bias label, severity, each biased segment with its replacement and reasoning, and the full unbiased rewrite.

Parameters:

Name Type Description Default
result BiasResult

The bias analysis result to format.

required

Returns:

Type Description
str

A human-readable colored string for terminal output.

Examples:

>>> result = BiasResult(
...     binary_label="biased",
...     severity=3,
...     bias_found=True,
...     biased_segments=[],
...     unbiased_text="Neutral.",
... )
>>> output = format_cli(result)
>>> isinstance(output, str)
True
Source code in src/unbias_plus/formatter.py
def format_cli(result: BiasResult) -> str:
    """Format a BiasResult for CLI terminal display.

    Produces a human-readable, colored terminal output showing
    the bias label, severity, each biased segment with its
    replacement and reasoning, and the full unbiased rewrite.

    Parameters
    ----------
    result : BiasResult
        The bias analysis result to format.

    Returns
    -------
    str
        A human-readable colored string for terminal output.

    Examples
    --------
    >>> result = BiasResult(
    ...     binary_label="biased",
    ...     severity=3,
    ...     bias_found=True,
    ...     biased_segments=[],
    ...     unbiased_text="Neutral.",
    ... )
    >>> output = format_cli(result)
    >>> isinstance(output, str)
    True

    """
    lines = []
    lines.append("=" * 60)
    if result.bias_found:
        lines.append(f"Segments found: {len(result.biased_segments)}")
    if not result.biased_segments:
        lines.append("\nNo biased segments detected.")
    lines.append("=" * 60)

    if result.biased_segments:
        lines.append("\nBIASED SEGMENTS:")
        for i, seg in enumerate(result.biased_segments, 1):
            color = _SEVERITY_COLORS.get(seg.severity, "")
            reset = _SEVERITY_COLORS["reset"]
            lines.append(f"\n  [{i}] {color}{seg.severity.upper()}{reset}")
            lines.append(f'  Original  : "{seg.original}"')
            lines.append(f'  Replace   : "{seg.replacement}"')
            lines.append(f"  Bias type : {seg.bias_type}")
            lines.append(f"  Reasoning : {seg.reasoning}")

    lines.append("\n" + "-" * 60)
    lines.append("NEUTRAL REWRITE:")
    lines.append(result.unbiased_text)
    lines.append("=" * 60)

    return "\n".join(lines)

format_dict

format_dict(result)

Convert a BiasResult to a plain Python dictionary.

Parameters:

Name Type Description Default
result BiasResult

The bias analysis result to convert.

required

Returns:

Type Description
dict

Plain dictionary representation of the result.

Examples:

>>> result = BiasResult(
...     binary_label="biased",
...     severity=3,
...     bias_found=True,
...     biased_segments=[],
...     unbiased_text="Neutral.",
... )
>>> d = format_dict(result)
>>> isinstance(d, dict)
True
Source code in src/unbias_plus/formatter.py
def format_dict(result: BiasResult) -> dict:
    """Convert a BiasResult to a plain Python dictionary.

    Parameters
    ----------
    result : BiasResult
        The bias analysis result to convert.

    Returns
    -------
    dict
        Plain dictionary representation of the result.

    Examples
    --------
    >>> result = BiasResult(
    ...     binary_label="biased",
    ...     severity=3,
    ...     bias_found=True,
    ...     biased_segments=[],
    ...     unbiased_text="Neutral.",
    ... )
    >>> d = format_dict(result)
    >>> isinstance(d, dict)
    True

    """
    return result.model_dump()

format_json

format_json(result)

Convert a BiasResult to a formatted JSON string.

Parameters:

Name Type Description Default
result BiasResult

The bias analysis result to convert.

required

Returns:

Type Description
str

Pretty-printed JSON string representation of the result.

Examples:

>>> result = BiasResult(
...     binary_label="biased",
...     severity=3,
...     bias_found=True,
...     biased_segments=[],
...     unbiased_text="Neutral.",
... )
>>> json_str = format_json(result)
>>> isinstance(json_str, str)
True
Source code in src/unbias_plus/formatter.py
def format_json(result: BiasResult) -> str:
    """Convert a BiasResult to a formatted JSON string.

    Parameters
    ----------
    result : BiasResult
        The bias analysis result to convert.

    Returns
    -------
    str
        Pretty-printed JSON string representation of the result.

    Examples
    --------
    >>> result = BiasResult(
    ...     binary_label="biased",
    ...     severity=3,
    ...     bias_found=True,
    ...     biased_segments=[],
    ...     unbiased_text="Neutral.",
    ... )
    >>> json_str = format_json(result)
    >>> isinstance(json_str, str)
    True

    """
    return json.dumps(result.model_dump(), indent=2)

model

LLM model loader and inference for unbias-plus.

UnBiasModel

Loads and runs the fine-tuned bias detection LLM.

Wraps a HuggingFace causal LM with a simple generate() interface. Compatible with any HuggingFace causal LM — thinking mode is opt-in for Qwen3 models only.

Parameters:

Name Type Description Default
model_name_or_path str | Path

HuggingFace model ID or local path to the model. Defaults to 'vector-institute/Qwen3-8B-UnBias-Plus-SFT'.

DEFAULT_MODEL
device str | None

Device to run on ('cuda' or 'cpu'). Auto-detects if not provided.

None
load_in_4bit bool

Load model in 4-bit quantization via bitsandbytes. Reduces VRAM to ~3GB (4B) or ~5GB (8B). Default is False.

False
max_new_tokens int

Maximum number of new tokens to generate. Default 2048.

2048
enable_thinking bool

Enable Qwen3 chain-of-thought thinking mode. Only supported by Qwen3 models — do not set for other models. Default is False.

False
thinking_budget int

Maximum tokens allocated to the thinking block when enable_thinking=True. Default is 512.

512

Examples:

>>> model = UnBiasModel()
>>> raw = model.generate([{"role": "user", "content": "..."}])
>>> isinstance(raw, str)
True
Source code in src/unbias_plus/model.py
class UnBiasModel:
    """Loads and runs the fine-tuned bias detection LLM.

    Wraps a HuggingFace causal LM with a simple generate()
    interface. Compatible with any HuggingFace causal LM —
    thinking mode is opt-in for Qwen3 models only.

    Parameters
    ----------
    model_name_or_path : str | Path
        HuggingFace model ID or local path to the model.
        Defaults to 'vector-institute/Qwen3-8B-UnBias-Plus-SFT'.
    device : str | None, optional
        Device to run on ('cuda' or 'cpu').
        Auto-detects if not provided.
    load_in_4bit : bool, optional
        Load model in 4-bit quantization via bitsandbytes.
        Reduces VRAM to ~3GB (4B) or ~5GB (8B). Default is False.
    max_new_tokens : int, optional
        Maximum number of new tokens to generate. Default 2048.
    enable_thinking : bool, optional
        Enable Qwen3 chain-of-thought thinking mode. Only supported
        by Qwen3 models — do not set for other models. Default is False.
    thinking_budget : int, optional
        Maximum tokens allocated to the thinking block when
        enable_thinking=True. Default is 512.

    Examples
    --------
    >>> model = UnBiasModel()  # doctest: +SKIP
    >>> raw = model.generate([{"role": "user", "content": "..."}])  # doctest: +SKIP
    >>> isinstance(raw, str)  # doctest: +SKIP
    True
    """

    def __init__(
        self,
        model_name_or_path: str | Path = DEFAULT_MODEL,
        device: str | None = None,
        load_in_4bit: bool = False,
        max_new_tokens: int = 2048,
        enable_thinking: bool = False,
        thinking_budget: int = 512,
    ) -> None:
        self.model_name_or_path = str(model_name_or_path)
        self.max_new_tokens = max_new_tokens
        self.enable_thinking = enable_thinking
        self.thinking_budget = thinking_budget
        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")

        # --- Tokenizer ---
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path)
        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token
            self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
        self.tokenizer.padding_side = "left"

        # --- Quantization config ---
        # Default model always loads in 4bit to keep VRAM manageable (~5GB).
        # For any custom model, load_in_4bit remains opt-in.
        effective_load_in_4bit = load_in_4bit or (
            self.model_name_or_path == DEFAULT_MODEL
        )
        quantization_config = None
        if effective_load_in_4bit:
            quantization_config = BitsAndBytesConfig(
                load_in_4bit=True,
                bnb_4bit_compute_dtype=torch.bfloat16,
            )

        # --- Model ---
        # device_map={'': device_index} ensures the full model lands on one
        # specific GPU, avoiding multi-GPU conflicts from device_map="auto".
        device_index = 0 if self.device == "cuda" else self.device
        self.model = AutoModelForCausalLM.from_pretrained(
            self.model_name_or_path,
            dtype=torch.bfloat16,
            device_map={"": device_index},
            quantization_config=quantization_config,
        )
        self.model.eval()

    def generate(self, messages: list[dict]) -> str:
        """Run inference on a list of chat messages and return the raw output.

        Uses greedy decoding (do_sample=False) for deterministic, consistent
        JSON output across runs. Works with any HuggingFace causal LM.

        Parameters
        ----------
        messages : list[dict]
            List of {"role": ..., "content": ...} dicts.
            Should include system prompt and user message.

        Returns
        -------
        str
            Raw string output from the model with the input prompt stripped.
            Special tokens are removed for clean downstream parsing.

        Examples
        --------
        >>> model = UnBiasModel()  # doctest: +SKIP
        >>> msgs = [{"role": "user", "content": "..."}]  # doctest: +SKIP
        >>> output = model.generate(msgs)  # doctest: +SKIP
        >>> isinstance(output, str)  # doctest: +SKIP
        True
        """
        # Build template kwargs as a literal — only pass thinking args when
        # explicitly enabled so the code works with any HF model, not just Qwen3.
        # enable_thinking is always passed explicitly (even as False) so
        # Qwen3's jinja template doesn't fall back to its own default of True.
        template_kwargs: dict = {
            "tokenize": True,
            "add_generation_prompt": True,
            "return_tensors": "pt",
            "return_dict": True,
            "truncation": True,
            "max_length": MAX_SEQ_LENGTH,
            # Always set enable_thinking explicitly for Qwen3 models so the
            # jinja template respects our setting rather than its own default.
            # For non-Qwen3 models this key is simply ignored by the tokenizer.
            "enable_thinking": self.enable_thinking,
        }
        if self.enable_thinking:
            template_kwargs["thinking_budget"] = self.thinking_budget

        tokenized = self.tokenizer.apply_chat_template(messages, **template_kwargs)

        input_ids = tokenized["input_ids"].to(self.device)
        attention_mask = tokenized["attention_mask"].to(self.device)

        with torch.no_grad():
            output_ids = self.model.generate(
                input_ids=input_ids,
                attention_mask=attention_mask,
                max_new_tokens=self.max_new_tokens,
                do_sample=False,  # greedy decoding — deterministic output
                temperature=None,  # must be None when do_sample=False
                top_p=None,  # must be None when do_sample=False
                pad_token_id=self.tokenizer.pad_token_id,
                eos_token_id=self.tokenizer.eos_token_id,
            )

        # Decode only the new tokens — strip the input prompt.
        # skip_special_tokens=True removes <|im_start|>, <|endoftext|> etc.
        # so the parser receives clean text without special token artifacts
        # that could corrupt JSON extraction.
        new_tokens = output_ids[0][input_ids.shape[-1] :]
        return str(self.tokenizer.decode(new_tokens, skip_special_tokens=True))
generate
generate(messages)

Run inference on a list of chat messages and return the raw output.

Uses greedy decoding (do_sample=False) for deterministic, consistent JSON output across runs. Works with any HuggingFace causal LM.

Parameters:

Name Type Description Default
messages list[dict]

List of {"role": ..., "content": ...} dicts. Should include system prompt and user message.

required

Returns:

Type Description
str

Raw string output from the model with the input prompt stripped. Special tokens are removed for clean downstream parsing.

Examples:

>>> model = UnBiasModel()
>>> msgs = [{"role": "user", "content": "..."}]
>>> output = model.generate(msgs)
>>> isinstance(output, str)
True
Source code in src/unbias_plus/model.py
def generate(self, messages: list[dict]) -> str:
    """Run inference on a list of chat messages and return the raw output.

    Uses greedy decoding (do_sample=False) for deterministic, consistent
    JSON output across runs. Works with any HuggingFace causal LM.

    Parameters
    ----------
    messages : list[dict]
        List of {"role": ..., "content": ...} dicts.
        Should include system prompt and user message.

    Returns
    -------
    str
        Raw string output from the model with the input prompt stripped.
        Special tokens are removed for clean downstream parsing.

    Examples
    --------
    >>> model = UnBiasModel()  # doctest: +SKIP
    >>> msgs = [{"role": "user", "content": "..."}]  # doctest: +SKIP
    >>> output = model.generate(msgs)  # doctest: +SKIP
    >>> isinstance(output, str)  # doctest: +SKIP
    True
    """
    # Build template kwargs as a literal — only pass thinking args when
    # explicitly enabled so the code works with any HF model, not just Qwen3.
    # enable_thinking is always passed explicitly (even as False) so
    # Qwen3's jinja template doesn't fall back to its own default of True.
    template_kwargs: dict = {
        "tokenize": True,
        "add_generation_prompt": True,
        "return_tensors": "pt",
        "return_dict": True,
        "truncation": True,
        "max_length": MAX_SEQ_LENGTH,
        # Always set enable_thinking explicitly for Qwen3 models so the
        # jinja template respects our setting rather than its own default.
        # For non-Qwen3 models this key is simply ignored by the tokenizer.
        "enable_thinking": self.enable_thinking,
    }
    if self.enable_thinking:
        template_kwargs["thinking_budget"] = self.thinking_budget

    tokenized = self.tokenizer.apply_chat_template(messages, **template_kwargs)

    input_ids = tokenized["input_ids"].to(self.device)
    attention_mask = tokenized["attention_mask"].to(self.device)

    with torch.no_grad():
        output_ids = self.model.generate(
            input_ids=input_ids,
            attention_mask=attention_mask,
            max_new_tokens=self.max_new_tokens,
            do_sample=False,  # greedy decoding — deterministic output
            temperature=None,  # must be None when do_sample=False
            top_p=None,  # must be None when do_sample=False
            pad_token_id=self.tokenizer.pad_token_id,
            eos_token_id=self.tokenizer.eos_token_id,
        )

    # Decode only the new tokens — strip the input prompt.
    # skip_special_tokens=True removes <|im_start|>, <|endoftext|> etc.
    # so the parser receives clean text without special token artifacts
    # that could corrupt JSON extraction.
    new_tokens = output_ids[0][input_ids.shape[-1] :]
    return str(self.tokenizer.decode(new_tokens, skip_special_tokens=True))

parser

Parser for LLM JSON output into BiasResult objects.

parse_llm_output

parse_llm_output(raw_output)

Parse raw LLM output string into a BiasResult object.

Handles Qwen3 thinking blocks (...) as well as plain JSON output from any model. Attempts multiple strategies to extract and parse a JSON object from the raw LLM output, then validates it against the BiasResult schema.

Strategies (in order): 1. Extract JSON by brace counting — stops at closing } so any hallucinated text after the JSON block is ignored entirely. 2. Strip thinking block from extracted text if present. 3. Direct JSON parse of extracted block. 4. Fix truncated strings (LLM cut off mid-output). 5. Fix missing commas between JSON items. 6. Aggressive key-by-key extraction as last resort.

Parameters:

Name Type Description Default
raw_output str

Raw string returned by the LLM, may include a thinking block, extra text, markdown code fences, or be truncated/malformed.

required

Returns:

Type Description
BiasResult

Validated and structured bias analysis result.

Raises:

Type Description
ValueError

If the output cannot be parsed as valid JSON or does not match the expected BiasResult schema after all repair attempts.

Examples:

>>> raw = '''
... {
...   "binary_label": "biased",
...   "severity": 3,
...   "bias_found": true,
...   "biased_segments": [],
...   "unbiased_text": "A neutral version."
... }
... '''
>>> result = parse_llm_output(raw)
>>> result.binary_label
'biased'
Source code in src/unbias_plus/parser.py
def parse_llm_output(raw_output: str) -> BiasResult:
    """Parse raw LLM output string into a BiasResult object.

    Handles Qwen3 thinking blocks (<think>...</think>) as well as
    plain JSON output from any model. Attempts multiple strategies
    to extract and parse a JSON object from the raw LLM output,
    then validates it against the BiasResult schema.

    Strategies (in order):
    1. Extract JSON by brace counting — stops at closing } so any
       hallucinated text after the JSON block is ignored entirely.
    2. Strip thinking block from extracted text if present.
    3. Direct JSON parse of extracted block.
    4. Fix truncated strings (LLM cut off mid-output).
    5. Fix missing commas between JSON items.
    6. Aggressive key-by-key extraction as last resort.

    Parameters
    ----------
    raw_output : str
        Raw string returned by the LLM, may include a thinking block,
        extra text, markdown code fences, or be truncated/malformed.

    Returns
    -------
    BiasResult
        Validated and structured bias analysis result.

    Raises
    ------
    ValueError
        If the output cannot be parsed as valid JSON or does
        not match the expected BiasResult schema after all
        repair attempts.

    Examples
    --------
    >>> raw = '''
    ... {
    ...   "binary_label": "biased",
    ...   "severity": 3,
    ...   "bias_found": true,
    ...   "biased_segments": [],
    ...   "unbiased_text": "A neutral version."
    ... }
    ... '''
    >>> result = parse_llm_output(raw)
    >>> result.binary_label
    'biased'

    """
    # Step 1: Extract JSON by brace counting first.
    # This stops at the closing } of the root JSON object so any
    # hallucinated text appended after (e.g. "assistant\n<think>\nuser\n...")
    # is ignored entirely before any other processing.
    cleaned = _extract_json(raw_output)

    # Step 2: Strip thinking block from the extracted text.
    # Safe to call on any model — no-op if no thinking block present.
    # Runs after extraction so a <think> tag hallucinated after the JSON
    # never causes _strip_thinking_block to incorrectly empty the string.
    text = _strip_thinking_block(cleaned)

    # Strategy 1: Direct parse
    data = _try_parse(text)

    # Strategy 2: Fix truncated JSON (most common LLM failure)
    if data is None:
        data = _try_parse(_fix_truncated_json(text))

    # Strategy 3: Fix missing commas
    if data is None:
        data = _try_parse(_fix_missing_commas(text))

    # Strategy 4: Fix truncated + missing commas combined
    if data is None:
        data = _try_parse(_fix_missing_commas(_fix_truncated_json(text)))

    # Strategy 5: Regex-based field extraction (last resort)
    if data is None:
        data = _extract_fields_by_regex(text)

    if data is None:
        raise ValueError(
            f"LLM output could not be parsed as JSON after all repair attempts.\n"
            f"Raw output:\n{raw_output}"
        )

    # Deduplicate segments with the same original phrase before schema validation
    if "biased_segments" in data and isinstance(data["biased_segments"], list):
        data["biased_segments"] = _deduplicate_segments(data["biased_segments"])

    try:
        return BiasResult(**data)
    except Exception as e:
        raise ValueError(
            f"LLM JSON does not match expected schema.\nData: {data}\nError: {e}"
        ) from e

pipeline

Main pipeline for unbias-plus.

UnBiasPlus

Main pipeline for bias detection and debiasing.

Loads a fine-tuned LLM and exposes a simple interface for analyzing text for bias. Combines prompt building, inference, JSON parsing, offset computation, and formatting.

Parameters:

Name Type Description Default
model_name_or_path str | Path

HuggingFace model ID or local path to the fine-tuned model. Defaults to 'vector-institute/Qwen3-4B-UnBias-Plus-SFT'.

DEFAULT_MODEL
device str | None

Device to run on ('cuda' or 'cpu'). Auto-detected if None.

None
load_in_4bit bool

Load model in 4-bit quantization. Default is False.

False
max_new_tokens int

Maximum tokens to generate. Default is 4096.

4096

Examples:

>>> from unbias_plus import UnBiasPlus
>>> pipe = UnBiasPlus()
>>> result = pipe.analyze("Women are too emotional to lead.")
>>> print(result.binary_label)
biased
Source code in src/unbias_plus/pipeline.py
class UnBiasPlus:
    """Main pipeline for bias detection and debiasing.

    Loads a fine-tuned LLM and exposes a simple interface for
    analyzing text for bias. Combines prompt building, inference,
    JSON parsing, offset computation, and formatting.

    Parameters
    ----------
    model_name_or_path : str | Path
        HuggingFace model ID or local path to the fine-tuned
        model. Defaults to 'vector-institute/Qwen3-4B-UnBias-Plus-SFT'.
    device : str | None, optional
        Device to run on ('cuda' or 'cpu'). Auto-detected if None.
    load_in_4bit : bool, optional
        Load model in 4-bit quantization. Default is False.
    max_new_tokens : int, optional
        Maximum tokens to generate. Default is 4096.

    Examples
    --------
    >>> from unbias_plus import UnBiasPlus  # doctest: +SKIP
    >>> pipe = UnBiasPlus()  # doctest: +SKIP
    >>> result = pipe.analyze("Women are too emotional to lead.")  # doctest: +SKIP
    >>> print(result.binary_label)  # doctest: +SKIP
    biased

    """

    def __init__(
        self,
        model_name_or_path: str | Path = DEFAULT_MODEL,
        device: str | None = None,
        load_in_4bit: bool = False,
        max_new_tokens: int = 4096,
    ) -> None:
        self._model = UnBiasModel(
            model_name_or_path=model_name_or_path,
            device=device,
            load_in_4bit=load_in_4bit,
            max_new_tokens=max_new_tokens,
        )

    def analyze(self, text: str) -> BiasResult:
        """Analyze input text for bias.

        Runs the full pipeline: builds chat messages, runs inference,
        parses JSON output, computes character offsets for each
        segment, and attaches the original text to the result.

        Parameters
        ----------
        text : str
            The input text to analyze.

        Returns
        -------
        BiasResult
            Structured bias result with start/end offsets on each
            segment and original_text populated.

        Raises
        ------
        ValueError
            If the LLM output cannot be parsed into a valid BiasResult.

        Examples
        --------
        >>> result = pipe.analyze("All politicians are liars.")  # doctest: +SKIP
        >>> result.bias_found  # doctest: +SKIP
        True

        """
        messages = build_messages(text)
        raw_output = self._model.generate(messages)
        result = parse_llm_output(raw_output)

        # Compute character-level offsets for frontend highlighting
        segments_with_offsets = compute_offsets(text, result.biased_segments)

        return result.model_copy(
            update={
                "biased_segments": segments_with_offsets,
                "original_text": text,
            }
        )

    def analyze_to_cli(self, text: str) -> str:
        """Analyze text and return a formatted CLI string.

        Parameters
        ----------
        text : str
            The input text to analyze.

        Returns
        -------
        str
            Human-readable colored string for terminal display.

        """
        return format_cli(self.analyze(text))

    def analyze_to_dict(self, text: str) -> dict:
        """Analyze text and return result as a plain dictionary.

        Parameters
        ----------
        text : str
            The input text to analyze.

        Returns
        -------
        dict
            Plain dictionary representation of the result.

        """
        return format_dict(self.analyze(text))

    def analyze_to_json(self, text: str) -> str:
        """Analyze text and return result as a JSON string.

        Parameters
        ----------
        text : str
            The input text to analyze.

        Returns
        -------
        str
            Pretty-printed JSON string of the result.

        """
        return format_json(self.analyze(text))
analyze
analyze(text)

Analyze input text for bias.

Runs the full pipeline: builds chat messages, runs inference, parses JSON output, computes character offsets for each segment, and attaches the original text to the result.

Parameters:

Name Type Description Default
text str

The input text to analyze.

required

Returns:

Type Description
BiasResult

Structured bias result with start/end offsets on each segment and original_text populated.

Raises:

Type Description
ValueError

If the LLM output cannot be parsed into a valid BiasResult.

Examples:

>>> result = pipe.analyze("All politicians are liars.")
>>> result.bias_found
True
Source code in src/unbias_plus/pipeline.py
def analyze(self, text: str) -> BiasResult:
    """Analyze input text for bias.

    Runs the full pipeline: builds chat messages, runs inference,
    parses JSON output, computes character offsets for each
    segment, and attaches the original text to the result.

    Parameters
    ----------
    text : str
        The input text to analyze.

    Returns
    -------
    BiasResult
        Structured bias result with start/end offsets on each
        segment and original_text populated.

    Raises
    ------
    ValueError
        If the LLM output cannot be parsed into a valid BiasResult.

    Examples
    --------
    >>> result = pipe.analyze("All politicians are liars.")  # doctest: +SKIP
    >>> result.bias_found  # doctest: +SKIP
    True

    """
    messages = build_messages(text)
    raw_output = self._model.generate(messages)
    result = parse_llm_output(raw_output)

    # Compute character-level offsets for frontend highlighting
    segments_with_offsets = compute_offsets(text, result.biased_segments)

    return result.model_copy(
        update={
            "biased_segments": segments_with_offsets,
            "original_text": text,
        }
    )
analyze_to_cli
analyze_to_cli(text)

Analyze text and return a formatted CLI string.

Parameters:

Name Type Description Default
text str

The input text to analyze.

required

Returns:

Type Description
str

Human-readable colored string for terminal display.

Source code in src/unbias_plus/pipeline.py
def analyze_to_cli(self, text: str) -> str:
    """Analyze text and return a formatted CLI string.

    Parameters
    ----------
    text : str
        The input text to analyze.

    Returns
    -------
    str
        Human-readable colored string for terminal display.

    """
    return format_cli(self.analyze(text))
analyze_to_dict
analyze_to_dict(text)

Analyze text and return result as a plain dictionary.

Parameters:

Name Type Description Default
text str

The input text to analyze.

required

Returns:

Type Description
dict

Plain dictionary representation of the result.

Source code in src/unbias_plus/pipeline.py
def analyze_to_dict(self, text: str) -> dict:
    """Analyze text and return result as a plain dictionary.

    Parameters
    ----------
    text : str
        The input text to analyze.

    Returns
    -------
    dict
        Plain dictionary representation of the result.

    """
    return format_dict(self.analyze(text))
analyze_to_json
analyze_to_json(text)

Analyze text and return result as a JSON string.

Parameters:

Name Type Description Default
text str

The input text to analyze.

required

Returns:

Type Description
str

Pretty-printed JSON string of the result.

Source code in src/unbias_plus/pipeline.py
def analyze_to_json(self, text: str) -> str:
    """Analyze text and return result as a JSON string.

    Parameters
    ----------
    text : str
        The input text to analyze.

    Returns
    -------
    str
        Pretty-printed JSON string of the result.

    """
    return format_json(self.analyze(text))

prompt

Prompt templates for the unbias-plus LLM.

build_messages

build_messages(text)

Build the chat messages list for the LLM given input text.

Formats the system prompt and user text into the messages format required by the model's chat template.

Parameters:

Name Type Description Default
text str

The input text to analyze for bias.

required

Returns:

Type Description
list[dict]

List of {"role": ..., "content": ...} dicts ready for tokenizer.apply_chat_template().

Examples:

>>> messages = build_messages("Women are too emotional to lead.")
>>> messages[0]["role"]
'system'
>>> messages[1]["role"]
'user'
>>> "Women are too emotional to lead." in messages[1]["content"]
True
Source code in src/unbias_plus/prompt.py
def build_messages(text: str) -> list[dict]:
    """Build the chat messages list for the LLM given input text.

    Formats the system prompt and user text into the messages format
    required by the model's chat template.

    Parameters
    ----------
    text : str
        The input text to analyze for bias.

    Returns
    -------
    list[dict]
        List of {"role": ..., "content": ...} dicts ready for
        tokenizer.apply_chat_template().

    Examples
    --------
    >>> messages = build_messages("Women are too emotional to lead.")
    >>> messages[0]["role"]
    'system'
    >>> messages[1]["role"]
    'user'
    >>> "Women are too emotional to lead." in messages[1]["content"]
    True
    """
    return [
        {"role": "system", "content": SYSTEM_PROMPT},
        {
            "role": "user",
            "content": (
                "Analyze the following text for bias and return the result "
                "in the required JSON format.\n\n"
                f"TEXT:\n{text}"
            ),
        },
    ]

schema

Data schemas for unbias-plus output.

BiasedSegment

Bases: BaseModel

A single biased segment detected in the text.

Attributes:

Name Type Description
original str

The original biased phrase from the input text.

replacement str

The suggested neutral replacement. Defaults to empty string if the model omits it (e.g. under 4-bit quantization).

severity str

Severity level: 'low', 'medium', or 'high'. Defaults to 'medium' if omitted by the model.

bias_type str

Type of bias (e.g. 'loaded language', 'framing bias').

reasoning str

Explanation of why this segment is considered biased.

start int | None

Character offset start in the original text. Computed by the pipeline after parsing.

end int | None

Character offset end in the original text. Computed by the pipeline after parsing.

Examples:

>>> seg = BiasedSegment(
...     original="Sharia-obsessed fanatics",
...     replacement="extremist groups",
...     severity="high",
...     bias_type="dehumanizing framing",
...     reasoning="Uses inflammatory religious language.",
... )
>>> seg.severity
'high'
Source code in src/unbias_plus/schema.py
class BiasedSegment(BaseModel):
    """A single biased segment detected in the text.

    Attributes
    ----------
    original : str
        The original biased phrase from the input text.
    replacement : str
        The suggested neutral replacement. Defaults to empty string
        if the model omits it (e.g. under 4-bit quantization).
    severity : str
        Severity level: 'low', 'medium', or 'high'.
        Defaults to 'medium' if omitted by the model.
    bias_type : str
        Type of bias (e.g. 'loaded language', 'framing bias').
    reasoning : str
        Explanation of why this segment is considered biased.
    start : int | None
        Character offset start in the original text. Computed
        by the pipeline after parsing.
    end : int | None
        Character offset end in the original text. Computed
        by the pipeline after parsing.

    Examples
    --------
    >>> seg = BiasedSegment(
    ...     original="Sharia-obsessed fanatics",
    ...     replacement="extremist groups",
    ...     severity="high",
    ...     bias_type="dehumanizing framing",
    ...     reasoning="Uses inflammatory religious language.",
    ... )
    >>> seg.severity
    'high'

    """

    original: str
    replacement: str = ""  # optional — model may omit under 4-bit quantization
    severity: str = "medium"  # optional — defaults to medium if omitted
    bias_type: str = ""
    reasoning: str = ""
    start: int | None = None
    end: int | None = None

    @field_validator("severity")
    @classmethod
    def validate_severity(cls, v: str) -> str:
        """Validate and normalise segment severity to low/medium/high."""
        allowed = {"low", "medium", "high"}
        normalized = v.lower().strip()
        if normalized not in allowed:
            logger.warning(
                "Unexpected segment severity '%s', defaulting to 'medium'", v
            )
            return "medium"
        return normalized
validate_severity classmethod
validate_severity(v)

Validate and normalise segment severity to low/medium/high.

Source code in src/unbias_plus/schema.py
@field_validator("severity")
@classmethod
def validate_severity(cls, v: str) -> str:
    """Validate and normalise segment severity to low/medium/high."""
    allowed = {"low", "medium", "high"}
    normalized = v.lower().strip()
    if normalized not in allowed:
        logger.warning(
            "Unexpected segment severity '%s', defaulting to 'medium'", v
        )
        return "medium"
    return normalized

BiasResult

Bases: BaseModel

Full bias analysis result for an input text.

Attributes:

Name Type Description
binary_label str

Overall label: 'biased' or 'unbiased'.

severity int

Overall severity score: 0 = neutral / no bias 2 = recurring biased framing 3 = strong persuasive tone 4 = inflammatory rhetoric If the model returns a string ('low', 'medium', 'high'), it is coerced to the nearest integer value.

bias_found bool

Whether any bias was detected in the text.

biased_segments list[BiasedSegment]

List of biased segments found in the text, each with character-level start/end offsets.

unbiased_text str

Full neutral rewrite of the input text.

original_text str | None

The original input text. Set by the pipeline.

Examples:

>>> result = BiasResult(
...     binary_label="biased",
...     severity=3,
...     bias_found=True,
...     biased_segments=[],
...     unbiased_text="A neutral version of the text.",
... )
>>> result.binary_label
'biased'
Source code in src/unbias_plus/schema.py
class BiasResult(BaseModel):
    """Full bias analysis result for an input text.

    Attributes
    ----------
    binary_label : str
        Overall label: 'biased' or 'unbiased'.
    severity : int
        Overall severity score:
          0 = neutral / no bias
          2 = recurring biased framing
          3 = strong persuasive tone
          4 = inflammatory rhetoric
        If the model returns a string ('low', 'medium', 'high'),
        it is coerced to the nearest integer value.
    bias_found : bool
        Whether any bias was detected in the text.
    biased_segments : list[BiasedSegment]
        List of biased segments found in the text, each with
        character-level start/end offsets.
    unbiased_text : str
        Full neutral rewrite of the input text.
    original_text : str | None
        The original input text. Set by the pipeline.

    Examples
    --------
    >>> result = BiasResult(
    ...     binary_label="biased",
    ...     severity=3,
    ...     bias_found=True,
    ...     biased_segments=[],
    ...     unbiased_text="A neutral version of the text.",
    ... )
    >>> result.binary_label
    'biased'

    """

    binary_label: str
    severity: int
    bias_found: bool
    biased_segments: list[BiasedSegment]
    unbiased_text: str
    original_text: str | None = None

    @field_validator("binary_label")
    @classmethod
    def validate_binary_label(cls, v: str) -> str:
        """Validate binary_label is 'biased' or 'unbiased'."""
        allowed = {"biased", "unbiased"}
        normalized = v.lower().strip()
        if normalized not in allowed:
            raise ValueError(f"binary_label must be one of {allowed}, got '{v}'")
        return normalized

    @field_validator("severity", mode="before")
    @classmethod
    def validate_severity(cls, v: int | str) -> int:
        """Coerce and validate global severity.

        Accepts:
          - int 0, 2, 3, 4  (correct model output)
          - str 'low', 'medium', 'high', 'none'  (model confused scales)
          - any other int   (clamped to nearest valid value)
        """
        # String coercion — model confused global vs segment severity scale
        if isinstance(v, str):
            normalized = v.lower().strip()
            if normalized in _STR_TO_INT_SEVERITY:
                coerced = _STR_TO_INT_SEVERITY[normalized]
                logger.warning(
                    "Global severity returned as string '%s', coerced to %d",
                    v,
                    coerced,
                )
                return coerced
            # Try parsing as int string e.g. "3"
            try:
                v = int(v)
            except ValueError:
                logger.warning("Unrecognized severity '%s', defaulting to 2", v)
                return 2

        # Clamp out-of-range integer values gracefully
        if v <= 0:
            return 0
        if v in {2, 3, 4}:
            return v
        if v == 1:
            return 2
        return 4  # anything > 4
validate_binary_label classmethod
validate_binary_label(v)

Validate binary_label is 'biased' or 'unbiased'.

Source code in src/unbias_plus/schema.py
@field_validator("binary_label")
@classmethod
def validate_binary_label(cls, v: str) -> str:
    """Validate binary_label is 'biased' or 'unbiased'."""
    allowed = {"biased", "unbiased"}
    normalized = v.lower().strip()
    if normalized not in allowed:
        raise ValueError(f"binary_label must be one of {allowed}, got '{v}'")
    return normalized
validate_severity classmethod
validate_severity(v)

Coerce and validate global severity.

Accepts: - int 0, 2, 3, 4 (correct model output) - str 'low', 'medium', 'high', 'none' (model confused scales) - any other int (clamped to nearest valid value)

Source code in src/unbias_plus/schema.py
@field_validator("severity", mode="before")
@classmethod
def validate_severity(cls, v: int | str) -> int:
    """Coerce and validate global severity.

    Accepts:
      - int 0, 2, 3, 4  (correct model output)
      - str 'low', 'medium', 'high', 'none'  (model confused scales)
      - any other int   (clamped to nearest valid value)
    """
    # String coercion — model confused global vs segment severity scale
    if isinstance(v, str):
        normalized = v.lower().strip()
        if normalized in _STR_TO_INT_SEVERITY:
            coerced = _STR_TO_INT_SEVERITY[normalized]
            logger.warning(
                "Global severity returned as string '%s', coerced to %d",
                v,
                coerced,
            )
            return coerced
        # Try parsing as int string e.g. "3"
        try:
            v = int(v)
        except ValueError:
            logger.warning("Unrecognized severity '%s', defaulting to 2", v)
            return 2

    # Clamp out-of-range integer values gracefully
    if v <= 0:
        return 0
    if v in {2, 3, 4}:
        return v
    if v == 1:
        return 2
    return 4  # anything > 4

compute_offsets

compute_offsets(original_text, segments)

Compute character start/end offsets for each biased segment.

Walks the original text with a cursor so that duplicate phrases are matched in order of appearance, not just the first occurrence.

Parameters:

Name Type Description Default
original_text str

The original input text.

required
segments list[BiasedSegment]

Parsed segments from the LLM (without offsets).

required

Returns:

Type Description
list[BiasedSegment]

Segments with start/end fields populated, sorted by start offset.

Source code in src/unbias_plus/schema.py
def compute_offsets(
    original_text: str, segments: list[BiasedSegment]
) -> list[BiasedSegment]:
    """Compute character start/end offsets for each biased segment.

    Walks the original text with a cursor so that duplicate phrases
    are matched in order of appearance, not just the first occurrence.

    Parameters
    ----------
    original_text : str
        The original input text.
    segments : list[BiasedSegment]
        Parsed segments from the LLM (without offsets).

    Returns
    -------
    list[BiasedSegment]
        Segments with start/end fields populated, sorted by start offset.

    """
    cursor = 0
    enriched = []

    for seg in segments:
        phrase = seg.original
        if not phrase:
            continue

        start = _find_case_insensitive(original_text, phrase, cursor)
        if start == -1:
            start = _find_case_insensitive(original_text, phrase, 0)

        if start == -1:
            logger.warning("Could not find segment in text: '%s'", phrase)
            enriched.append(seg)
            continue

        end = start + len(phrase)
        enriched.append(seg.model_copy(update={"start": start, "end": end}))
        cursor = end

    enriched.sort(key=lambda s: s.start if s.start is not None else 0)
    return enriched

Pipeline

unbias_plus.pipeline

Main pipeline for unbias-plus.

UnBiasPlus

Main pipeline for bias detection and debiasing.

Loads a fine-tuned LLM and exposes a simple interface for analyzing text for bias. Combines prompt building, inference, JSON parsing, offset computation, and formatting.

Parameters:

Name Type Description Default
model_name_or_path str | Path

HuggingFace model ID or local path to the fine-tuned model. Defaults to 'vector-institute/Qwen3-4B-UnBias-Plus-SFT'.

DEFAULT_MODEL
device str | None

Device to run on ('cuda' or 'cpu'). Auto-detected if None.

None
load_in_4bit bool

Load model in 4-bit quantization. Default is False.

False
max_new_tokens int

Maximum tokens to generate. Default is 4096.

4096

Examples:

>>> from unbias_plus import UnBiasPlus
>>> pipe = UnBiasPlus()
>>> result = pipe.analyze("Women are too emotional to lead.")
>>> print(result.binary_label)
biased
Source code in src/unbias_plus/pipeline.py
class UnBiasPlus:
    """Main pipeline for bias detection and debiasing.

    Loads a fine-tuned LLM and exposes a simple interface for
    analyzing text for bias. Combines prompt building, inference,
    JSON parsing, offset computation, and formatting.

    Parameters
    ----------
    model_name_or_path : str | Path
        HuggingFace model ID or local path to the fine-tuned
        model. Defaults to 'vector-institute/Qwen3-4B-UnBias-Plus-SFT'.
    device : str | None, optional
        Device to run on ('cuda' or 'cpu'). Auto-detected if None.
    load_in_4bit : bool, optional
        Load model in 4-bit quantization. Default is False.
    max_new_tokens : int, optional
        Maximum tokens to generate. Default is 4096.

    Examples
    --------
    >>> from unbias_plus import UnBiasPlus  # doctest: +SKIP
    >>> pipe = UnBiasPlus()  # doctest: +SKIP
    >>> result = pipe.analyze("Women are too emotional to lead.")  # doctest: +SKIP
    >>> print(result.binary_label)  # doctest: +SKIP
    biased

    """

    def __init__(
        self,
        model_name_or_path: str | Path = DEFAULT_MODEL,
        device: str | None = None,
        load_in_4bit: bool = False,
        max_new_tokens: int = 4096,
    ) -> None:
        self._model = UnBiasModel(
            model_name_or_path=model_name_or_path,
            device=device,
            load_in_4bit=load_in_4bit,
            max_new_tokens=max_new_tokens,
        )

    def analyze(self, text: str) -> BiasResult:
        """Analyze input text for bias.

        Runs the full pipeline: builds chat messages, runs inference,
        parses JSON output, computes character offsets for each
        segment, and attaches the original text to the result.

        Parameters
        ----------
        text : str
            The input text to analyze.

        Returns
        -------
        BiasResult
            Structured bias result with start/end offsets on each
            segment and original_text populated.

        Raises
        ------
        ValueError
            If the LLM output cannot be parsed into a valid BiasResult.

        Examples
        --------
        >>> result = pipe.analyze("All politicians are liars.")  # doctest: +SKIP
        >>> result.bias_found  # doctest: +SKIP
        True

        """
        messages = build_messages(text)
        raw_output = self._model.generate(messages)
        result = parse_llm_output(raw_output)

        # Compute character-level offsets for frontend highlighting
        segments_with_offsets = compute_offsets(text, result.biased_segments)

        return result.model_copy(
            update={
                "biased_segments": segments_with_offsets,
                "original_text": text,
            }
        )

    def analyze_to_cli(self, text: str) -> str:
        """Analyze text and return a formatted CLI string.

        Parameters
        ----------
        text : str
            The input text to analyze.

        Returns
        -------
        str
            Human-readable colored string for terminal display.

        """
        return format_cli(self.analyze(text))

    def analyze_to_dict(self, text: str) -> dict:
        """Analyze text and return result as a plain dictionary.

        Parameters
        ----------
        text : str
            The input text to analyze.

        Returns
        -------
        dict
            Plain dictionary representation of the result.

        """
        return format_dict(self.analyze(text))

    def analyze_to_json(self, text: str) -> str:
        """Analyze text and return result as a JSON string.

        Parameters
        ----------
        text : str
            The input text to analyze.

        Returns
        -------
        str
            Pretty-printed JSON string of the result.

        """
        return format_json(self.analyze(text))

analyze

analyze(text)

Analyze input text for bias.

Runs the full pipeline: builds chat messages, runs inference, parses JSON output, computes character offsets for each segment, and attaches the original text to the result.

Parameters:

Name Type Description Default
text str

The input text to analyze.

required

Returns:

Type Description
BiasResult

Structured bias result with start/end offsets on each segment and original_text populated.

Raises:

Type Description
ValueError

If the LLM output cannot be parsed into a valid BiasResult.

Examples:

>>> result = pipe.analyze("All politicians are liars.")
>>> result.bias_found
True
Source code in src/unbias_plus/pipeline.py
def analyze(self, text: str) -> BiasResult:
    """Analyze input text for bias.

    Runs the full pipeline: builds chat messages, runs inference,
    parses JSON output, computes character offsets for each
    segment, and attaches the original text to the result.

    Parameters
    ----------
    text : str
        The input text to analyze.

    Returns
    -------
    BiasResult
        Structured bias result with start/end offsets on each
        segment and original_text populated.

    Raises
    ------
    ValueError
        If the LLM output cannot be parsed into a valid BiasResult.

    Examples
    --------
    >>> result = pipe.analyze("All politicians are liars.")  # doctest: +SKIP
    >>> result.bias_found  # doctest: +SKIP
    True

    """
    messages = build_messages(text)
    raw_output = self._model.generate(messages)
    result = parse_llm_output(raw_output)

    # Compute character-level offsets for frontend highlighting
    segments_with_offsets = compute_offsets(text, result.biased_segments)

    return result.model_copy(
        update={
            "biased_segments": segments_with_offsets,
            "original_text": text,
        }
    )

analyze_to_cli

analyze_to_cli(text)

Analyze text and return a formatted CLI string.

Parameters:

Name Type Description Default
text str

The input text to analyze.

required

Returns:

Type Description
str

Human-readable colored string for terminal display.

Source code in src/unbias_plus/pipeline.py
def analyze_to_cli(self, text: str) -> str:
    """Analyze text and return a formatted CLI string.

    Parameters
    ----------
    text : str
        The input text to analyze.

    Returns
    -------
    str
        Human-readable colored string for terminal display.

    """
    return format_cli(self.analyze(text))

analyze_to_dict

analyze_to_dict(text)

Analyze text and return result as a plain dictionary.

Parameters:

Name Type Description Default
text str

The input text to analyze.

required

Returns:

Type Description
dict

Plain dictionary representation of the result.

Source code in src/unbias_plus/pipeline.py
def analyze_to_dict(self, text: str) -> dict:
    """Analyze text and return result as a plain dictionary.

    Parameters
    ----------
    text : str
        The input text to analyze.

    Returns
    -------
    dict
        Plain dictionary representation of the result.

    """
    return format_dict(self.analyze(text))

analyze_to_json

analyze_to_json(text)

Analyze text and return result as a JSON string.

Parameters:

Name Type Description Default
text str

The input text to analyze.

required

Returns:

Type Description
str

Pretty-printed JSON string of the result.

Source code in src/unbias_plus/pipeline.py
def analyze_to_json(self, text: str) -> str:
    """Analyze text and return result as a JSON string.

    Parameters
    ----------
    text : str
        The input text to analyze.

    Returns
    -------
    str
        Pretty-printed JSON string of the result.

    """
    return format_json(self.analyze(text))

Model

unbias_plus.model

LLM model loader and inference for unbias-plus.

UnBiasModel

Loads and runs the fine-tuned bias detection LLM.

Wraps a HuggingFace causal LM with a simple generate() interface. Compatible with any HuggingFace causal LM — thinking mode is opt-in for Qwen3 models only.

Parameters:

Name Type Description Default
model_name_or_path str | Path

HuggingFace model ID or local path to the model. Defaults to 'vector-institute/Qwen3-8B-UnBias-Plus-SFT'.

DEFAULT_MODEL
device str | None

Device to run on ('cuda' or 'cpu'). Auto-detects if not provided.

None
load_in_4bit bool

Load model in 4-bit quantization via bitsandbytes. Reduces VRAM to ~3GB (4B) or ~5GB (8B). Default is False.

False
max_new_tokens int

Maximum number of new tokens to generate. Default 2048.

2048
enable_thinking bool

Enable Qwen3 chain-of-thought thinking mode. Only supported by Qwen3 models — do not set for other models. Default is False.

False
thinking_budget int

Maximum tokens allocated to the thinking block when enable_thinking=True. Default is 512.

512

Examples:

>>> model = UnBiasModel()
>>> raw = model.generate([{"role": "user", "content": "..."}])
>>> isinstance(raw, str)
True
Source code in src/unbias_plus/model.py
class UnBiasModel:
    """Loads and runs the fine-tuned bias detection LLM.

    Wraps a HuggingFace causal LM with a simple generate()
    interface. Compatible with any HuggingFace causal LM —
    thinking mode is opt-in for Qwen3 models only.

    Parameters
    ----------
    model_name_or_path : str | Path
        HuggingFace model ID or local path to the model.
        Defaults to 'vector-institute/Qwen3-8B-UnBias-Plus-SFT'.
    device : str | None, optional
        Device to run on ('cuda' or 'cpu').
        Auto-detects if not provided.
    load_in_4bit : bool, optional
        Load model in 4-bit quantization via bitsandbytes.
        Reduces VRAM to ~3GB (4B) or ~5GB (8B). Default is False.
    max_new_tokens : int, optional
        Maximum number of new tokens to generate. Default 2048.
    enable_thinking : bool, optional
        Enable Qwen3 chain-of-thought thinking mode. Only supported
        by Qwen3 models — do not set for other models. Default is False.
    thinking_budget : int, optional
        Maximum tokens allocated to the thinking block when
        enable_thinking=True. Default is 512.

    Examples
    --------
    >>> model = UnBiasModel()  # doctest: +SKIP
    >>> raw = model.generate([{"role": "user", "content": "..."}])  # doctest: +SKIP
    >>> isinstance(raw, str)  # doctest: +SKIP
    True
    """

    def __init__(
        self,
        model_name_or_path: str | Path = DEFAULT_MODEL,
        device: str | None = None,
        load_in_4bit: bool = False,
        max_new_tokens: int = 2048,
        enable_thinking: bool = False,
        thinking_budget: int = 512,
    ) -> None:
        self.model_name_or_path = str(model_name_or_path)
        self.max_new_tokens = max_new_tokens
        self.enable_thinking = enable_thinking
        self.thinking_budget = thinking_budget
        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")

        # --- Tokenizer ---
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path)
        if self.tokenizer.pad_token is None:
            self.tokenizer.pad_token = self.tokenizer.eos_token
            self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
        self.tokenizer.padding_side = "left"

        # --- Quantization config ---
        # Default model always loads in 4bit to keep VRAM manageable (~5GB).
        # For any custom model, load_in_4bit remains opt-in.
        effective_load_in_4bit = load_in_4bit or (
            self.model_name_or_path == DEFAULT_MODEL
        )
        quantization_config = None
        if effective_load_in_4bit:
            quantization_config = BitsAndBytesConfig(
                load_in_4bit=True,
                bnb_4bit_compute_dtype=torch.bfloat16,
            )

        # --- Model ---
        # device_map={'': device_index} ensures the full model lands on one
        # specific GPU, avoiding multi-GPU conflicts from device_map="auto".
        device_index = 0 if self.device == "cuda" else self.device
        self.model = AutoModelForCausalLM.from_pretrained(
            self.model_name_or_path,
            dtype=torch.bfloat16,
            device_map={"": device_index},
            quantization_config=quantization_config,
        )
        self.model.eval()

    def generate(self, messages: list[dict]) -> str:
        """Run inference on a list of chat messages and return the raw output.

        Uses greedy decoding (do_sample=False) for deterministic, consistent
        JSON output across runs. Works with any HuggingFace causal LM.

        Parameters
        ----------
        messages : list[dict]
            List of {"role": ..., "content": ...} dicts.
            Should include system prompt and user message.

        Returns
        -------
        str
            Raw string output from the model with the input prompt stripped.
            Special tokens are removed for clean downstream parsing.

        Examples
        --------
        >>> model = UnBiasModel()  # doctest: +SKIP
        >>> msgs = [{"role": "user", "content": "..."}]  # doctest: +SKIP
        >>> output = model.generate(msgs)  # doctest: +SKIP
        >>> isinstance(output, str)  # doctest: +SKIP
        True
        """
        # Build template kwargs as a literal — only pass thinking args when
        # explicitly enabled so the code works with any HF model, not just Qwen3.
        # enable_thinking is always passed explicitly (even as False) so
        # Qwen3's jinja template doesn't fall back to its own default of True.
        template_kwargs: dict = {
            "tokenize": True,
            "add_generation_prompt": True,
            "return_tensors": "pt",
            "return_dict": True,
            "truncation": True,
            "max_length": MAX_SEQ_LENGTH,
            # Always set enable_thinking explicitly for Qwen3 models so the
            # jinja template respects our setting rather than its own default.
            # For non-Qwen3 models this key is simply ignored by the tokenizer.
            "enable_thinking": self.enable_thinking,
        }
        if self.enable_thinking:
            template_kwargs["thinking_budget"] = self.thinking_budget

        tokenized = self.tokenizer.apply_chat_template(messages, **template_kwargs)

        input_ids = tokenized["input_ids"].to(self.device)
        attention_mask = tokenized["attention_mask"].to(self.device)

        with torch.no_grad():
            output_ids = self.model.generate(
                input_ids=input_ids,
                attention_mask=attention_mask,
                max_new_tokens=self.max_new_tokens,
                do_sample=False,  # greedy decoding — deterministic output
                temperature=None,  # must be None when do_sample=False
                top_p=None,  # must be None when do_sample=False
                pad_token_id=self.tokenizer.pad_token_id,
                eos_token_id=self.tokenizer.eos_token_id,
            )

        # Decode only the new tokens — strip the input prompt.
        # skip_special_tokens=True removes <|im_start|>, <|endoftext|> etc.
        # so the parser receives clean text without special token artifacts
        # that could corrupt JSON extraction.
        new_tokens = output_ids[0][input_ids.shape[-1] :]
        return str(self.tokenizer.decode(new_tokens, skip_special_tokens=True))

generate

generate(messages)

Run inference on a list of chat messages and return the raw output.

Uses greedy decoding (do_sample=False) for deterministic, consistent JSON output across runs. Works with any HuggingFace causal LM.

Parameters:

Name Type Description Default
messages list[dict]

List of {"role": ..., "content": ...} dicts. Should include system prompt and user message.

required

Returns:

Type Description
str

Raw string output from the model with the input prompt stripped. Special tokens are removed for clean downstream parsing.

Examples:

>>> model = UnBiasModel()
>>> msgs = [{"role": "user", "content": "..."}]
>>> output = model.generate(msgs)
>>> isinstance(output, str)
True
Source code in src/unbias_plus/model.py
def generate(self, messages: list[dict]) -> str:
    """Run inference on a list of chat messages and return the raw output.

    Uses greedy decoding (do_sample=False) for deterministic, consistent
    JSON output across runs. Works with any HuggingFace causal LM.

    Parameters
    ----------
    messages : list[dict]
        List of {"role": ..., "content": ...} dicts.
        Should include system prompt and user message.

    Returns
    -------
    str
        Raw string output from the model with the input prompt stripped.
        Special tokens are removed for clean downstream parsing.

    Examples
    --------
    >>> model = UnBiasModel()  # doctest: +SKIP
    >>> msgs = [{"role": "user", "content": "..."}]  # doctest: +SKIP
    >>> output = model.generate(msgs)  # doctest: +SKIP
    >>> isinstance(output, str)  # doctest: +SKIP
    True
    """
    # Build template kwargs as a literal — only pass thinking args when
    # explicitly enabled so the code works with any HF model, not just Qwen3.
    # enable_thinking is always passed explicitly (even as False) so
    # Qwen3's jinja template doesn't fall back to its own default of True.
    template_kwargs: dict = {
        "tokenize": True,
        "add_generation_prompt": True,
        "return_tensors": "pt",
        "return_dict": True,
        "truncation": True,
        "max_length": MAX_SEQ_LENGTH,
        # Always set enable_thinking explicitly for Qwen3 models so the
        # jinja template respects our setting rather than its own default.
        # For non-Qwen3 models this key is simply ignored by the tokenizer.
        "enable_thinking": self.enable_thinking,
    }
    if self.enable_thinking:
        template_kwargs["thinking_budget"] = self.thinking_budget

    tokenized = self.tokenizer.apply_chat_template(messages, **template_kwargs)

    input_ids = tokenized["input_ids"].to(self.device)
    attention_mask = tokenized["attention_mask"].to(self.device)

    with torch.no_grad():
        output_ids = self.model.generate(
            input_ids=input_ids,
            attention_mask=attention_mask,
            max_new_tokens=self.max_new_tokens,
            do_sample=False,  # greedy decoding — deterministic output
            temperature=None,  # must be None when do_sample=False
            top_p=None,  # must be None when do_sample=False
            pad_token_id=self.tokenizer.pad_token_id,
            eos_token_id=self.tokenizer.eos_token_id,
        )

    # Decode only the new tokens — strip the input prompt.
    # skip_special_tokens=True removes <|im_start|>, <|endoftext|> etc.
    # so the parser receives clean text without special token artifacts
    # that could corrupt JSON extraction.
    new_tokens = output_ids[0][input_ids.shape[-1] :]
    return str(self.tokenizer.decode(new_tokens, skip_special_tokens=True))

Schema

unbias_plus.schema

Data schemas for unbias-plus output.

BiasedSegment

Bases: BaseModel

A single biased segment detected in the text.

Attributes:

Name Type Description
original str

The original biased phrase from the input text.

replacement str

The suggested neutral replacement. Defaults to empty string if the model omits it (e.g. under 4-bit quantization).

severity str

Severity level: 'low', 'medium', or 'high'. Defaults to 'medium' if omitted by the model.

bias_type str

Type of bias (e.g. 'loaded language', 'framing bias').

reasoning str

Explanation of why this segment is considered biased.

start int | None

Character offset start in the original text. Computed by the pipeline after parsing.

end int | None

Character offset end in the original text. Computed by the pipeline after parsing.

Examples:

>>> seg = BiasedSegment(
...     original="Sharia-obsessed fanatics",
...     replacement="extremist groups",
...     severity="high",
...     bias_type="dehumanizing framing",
...     reasoning="Uses inflammatory religious language.",
... )
>>> seg.severity
'high'
Source code in src/unbias_plus/schema.py
class BiasedSegment(BaseModel):
    """A single biased segment detected in the text.

    Attributes
    ----------
    original : str
        The original biased phrase from the input text.
    replacement : str
        The suggested neutral replacement. Defaults to empty string
        if the model omits it (e.g. under 4-bit quantization).
    severity : str
        Severity level: 'low', 'medium', or 'high'.
        Defaults to 'medium' if omitted by the model.
    bias_type : str
        Type of bias (e.g. 'loaded language', 'framing bias').
    reasoning : str
        Explanation of why this segment is considered biased.
    start : int | None
        Character offset start in the original text. Computed
        by the pipeline after parsing.
    end : int | None
        Character offset end in the original text. Computed
        by the pipeline after parsing.

    Examples
    --------
    >>> seg = BiasedSegment(
    ...     original="Sharia-obsessed fanatics",
    ...     replacement="extremist groups",
    ...     severity="high",
    ...     bias_type="dehumanizing framing",
    ...     reasoning="Uses inflammatory religious language.",
    ... )
    >>> seg.severity
    'high'

    """

    original: str
    replacement: str = ""  # optional — model may omit under 4-bit quantization
    severity: str = "medium"  # optional — defaults to medium if omitted
    bias_type: str = ""
    reasoning: str = ""
    start: int | None = None
    end: int | None = None

    @field_validator("severity")
    @classmethod
    def validate_severity(cls, v: str) -> str:
        """Validate and normalise segment severity to low/medium/high."""
        allowed = {"low", "medium", "high"}
        normalized = v.lower().strip()
        if normalized not in allowed:
            logger.warning(
                "Unexpected segment severity '%s', defaulting to 'medium'", v
            )
            return "medium"
        return normalized

validate_severity classmethod

validate_severity(v)

Validate and normalise segment severity to low/medium/high.

Source code in src/unbias_plus/schema.py
@field_validator("severity")
@classmethod
def validate_severity(cls, v: str) -> str:
    """Validate and normalise segment severity to low/medium/high."""
    allowed = {"low", "medium", "high"}
    normalized = v.lower().strip()
    if normalized not in allowed:
        logger.warning(
            "Unexpected segment severity '%s', defaulting to 'medium'", v
        )
        return "medium"
    return normalized

BiasResult

Bases: BaseModel

Full bias analysis result for an input text.

Attributes:

Name Type Description
binary_label str

Overall label: 'biased' or 'unbiased'.

severity int

Overall severity score: 0 = neutral / no bias 2 = recurring biased framing 3 = strong persuasive tone 4 = inflammatory rhetoric If the model returns a string ('low', 'medium', 'high'), it is coerced to the nearest integer value.

bias_found bool

Whether any bias was detected in the text.

biased_segments list[BiasedSegment]

List of biased segments found in the text, each with character-level start/end offsets.

unbiased_text str

Full neutral rewrite of the input text.

original_text str | None

The original input text. Set by the pipeline.

Examples:

>>> result = BiasResult(
...     binary_label="biased",
...     severity=3,
...     bias_found=True,
...     biased_segments=[],
...     unbiased_text="A neutral version of the text.",
... )
>>> result.binary_label
'biased'
Source code in src/unbias_plus/schema.py
class BiasResult(BaseModel):
    """Full bias analysis result for an input text.

    Attributes
    ----------
    binary_label : str
        Overall label: 'biased' or 'unbiased'.
    severity : int
        Overall severity score:
          0 = neutral / no bias
          2 = recurring biased framing
          3 = strong persuasive tone
          4 = inflammatory rhetoric
        If the model returns a string ('low', 'medium', 'high'),
        it is coerced to the nearest integer value.
    bias_found : bool
        Whether any bias was detected in the text.
    biased_segments : list[BiasedSegment]
        List of biased segments found in the text, each with
        character-level start/end offsets.
    unbiased_text : str
        Full neutral rewrite of the input text.
    original_text : str | None
        The original input text. Set by the pipeline.

    Examples
    --------
    >>> result = BiasResult(
    ...     binary_label="biased",
    ...     severity=3,
    ...     bias_found=True,
    ...     biased_segments=[],
    ...     unbiased_text="A neutral version of the text.",
    ... )
    >>> result.binary_label
    'biased'

    """

    binary_label: str
    severity: int
    bias_found: bool
    biased_segments: list[BiasedSegment]
    unbiased_text: str
    original_text: str | None = None

    @field_validator("binary_label")
    @classmethod
    def validate_binary_label(cls, v: str) -> str:
        """Validate binary_label is 'biased' or 'unbiased'."""
        allowed = {"biased", "unbiased"}
        normalized = v.lower().strip()
        if normalized not in allowed:
            raise ValueError(f"binary_label must be one of {allowed}, got '{v}'")
        return normalized

    @field_validator("severity", mode="before")
    @classmethod
    def validate_severity(cls, v: int | str) -> int:
        """Coerce and validate global severity.

        Accepts:
          - int 0, 2, 3, 4  (correct model output)
          - str 'low', 'medium', 'high', 'none'  (model confused scales)
          - any other int   (clamped to nearest valid value)
        """
        # String coercion — model confused global vs segment severity scale
        if isinstance(v, str):
            normalized = v.lower().strip()
            if normalized in _STR_TO_INT_SEVERITY:
                coerced = _STR_TO_INT_SEVERITY[normalized]
                logger.warning(
                    "Global severity returned as string '%s', coerced to %d",
                    v,
                    coerced,
                )
                return coerced
            # Try parsing as int string e.g. "3"
            try:
                v = int(v)
            except ValueError:
                logger.warning("Unrecognized severity '%s', defaulting to 2", v)
                return 2

        # Clamp out-of-range integer values gracefully
        if v <= 0:
            return 0
        if v in {2, 3, 4}:
            return v
        if v == 1:
            return 2
        return 4  # anything > 4

validate_binary_label classmethod

validate_binary_label(v)

Validate binary_label is 'biased' or 'unbiased'.

Source code in src/unbias_plus/schema.py
@field_validator("binary_label")
@classmethod
def validate_binary_label(cls, v: str) -> str:
    """Validate binary_label is 'biased' or 'unbiased'."""
    allowed = {"biased", "unbiased"}
    normalized = v.lower().strip()
    if normalized not in allowed:
        raise ValueError(f"binary_label must be one of {allowed}, got '{v}'")
    return normalized

validate_severity classmethod

validate_severity(v)

Coerce and validate global severity.

Accepts: - int 0, 2, 3, 4 (correct model output) - str 'low', 'medium', 'high', 'none' (model confused scales) - any other int (clamped to nearest valid value)

Source code in src/unbias_plus/schema.py
@field_validator("severity", mode="before")
@classmethod
def validate_severity(cls, v: int | str) -> int:
    """Coerce and validate global severity.

    Accepts:
      - int 0, 2, 3, 4  (correct model output)
      - str 'low', 'medium', 'high', 'none'  (model confused scales)
      - any other int   (clamped to nearest valid value)
    """
    # String coercion — model confused global vs segment severity scale
    if isinstance(v, str):
        normalized = v.lower().strip()
        if normalized in _STR_TO_INT_SEVERITY:
            coerced = _STR_TO_INT_SEVERITY[normalized]
            logger.warning(
                "Global severity returned as string '%s', coerced to %d",
                v,
                coerced,
            )
            return coerced
        # Try parsing as int string e.g. "3"
        try:
            v = int(v)
        except ValueError:
            logger.warning("Unrecognized severity '%s', defaulting to 2", v)
            return 2

    # Clamp out-of-range integer values gracefully
    if v <= 0:
        return 0
    if v in {2, 3, 4}:
        return v
    if v == 1:
        return 2
    return 4  # anything > 4

compute_offsets

compute_offsets(original_text, segments)

Compute character start/end offsets for each biased segment.

Walks the original text with a cursor so that duplicate phrases are matched in order of appearance, not just the first occurrence.

Parameters:

Name Type Description Default
original_text str

The original input text.

required
segments list[BiasedSegment]

Parsed segments from the LLM (without offsets).

required

Returns:

Type Description
list[BiasedSegment]

Segments with start/end fields populated, sorted by start offset.

Source code in src/unbias_plus/schema.py
def compute_offsets(
    original_text: str, segments: list[BiasedSegment]
) -> list[BiasedSegment]:
    """Compute character start/end offsets for each biased segment.

    Walks the original text with a cursor so that duplicate phrases
    are matched in order of appearance, not just the first occurrence.

    Parameters
    ----------
    original_text : str
        The original input text.
    segments : list[BiasedSegment]
        Parsed segments from the LLM (without offsets).

    Returns
    -------
    list[BiasedSegment]
        Segments with start/end fields populated, sorted by start offset.

    """
    cursor = 0
    enriched = []

    for seg in segments:
        phrase = seg.original
        if not phrase:
            continue

        start = _find_case_insensitive(original_text, phrase, cursor)
        if start == -1:
            start = _find_case_insensitive(original_text, phrase, 0)

        if start == -1:
            logger.warning("Could not find segment in text: '%s'", phrase)
            enriched.append(seg)
            continue

        end = start + len(phrase)
        enriched.append(seg.model_copy(update={"start": start, "end": end}))
        cursor = end

    enriched.sort(key=lambda s: s.start if s.start is not None else 0)
    return enriched

API (FastAPI server)

unbias_plus.api

FastAPI server for unbias-plus.

AnalyzeRequest

Bases: BaseModel

Request body for the analyze endpoint.

Attributes:

Name Type Description
text str

The input text to analyze for bias.

Source code in src/unbias_plus/api.py
class AnalyzeRequest(BaseModel):
    """Request body for the analyze endpoint.

    Attributes
    ----------
    text : str
        The input text to analyze for bias.
    """

    text: str

HealthResponse

Bases: BaseModel

Response body for the health endpoint.

Attributes:

Name Type Description
status str

Server status string.

model str

Currently loaded model name or path.

Source code in src/unbias_plus/api.py
class HealthResponse(BaseModel):
    """Response body for the health endpoint.

    Attributes
    ----------
    status : str
        Server status string.
    model : str
        Currently loaded model name or path.
    """

    status: str
    model: str

lifespan async

lifespan(app)

Load the model on startup and release on shutdown.

Parameters:

Name Type Description Default
app FastAPI

The FastAPI application instance.

required

Yields:

Type Description
None
Source code in src/unbias_plus/api.py
@asynccontextmanager
async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
    """Load the model on startup and release on shutdown.

    Parameters
    ----------
    app : FastAPI
        The FastAPI application instance.

    Yields
    ------
    None

    """
    model_path = getattr(app.state, "model_name_or_path", DEFAULT_MODEL)
    load_in_4bit = getattr(app.state, "load_in_4bit", False)
    app.state.pipe = UnBiasPlus(
        model_name_or_path=model_path,
        load_in_4bit=load_in_4bit,
    )
    yield
    app.state.pipe = None

index

index()

Serve the demo UI.

Returns:

Type Description
str

HTML content of the demo page.

Raises:

Type Description
HTTPException

404 if the demo directory is not found.

Source code in src/unbias_plus/api.py
@app.get("/", response_class=HTMLResponse)
def index() -> str:
    """Serve the demo UI.

    Returns
    -------
    str
        HTML content of the demo page.

    Raises
    ------
    HTTPException
        404 if the demo directory is not found.

    """
    html_file = DEMO_DIR / "templates" / "index.html"
    if not html_file.exists():
        raise HTTPException(status_code=404, detail="Demo UI not found.")
    return html_file.read_text()

health

health(request)

Check if the server and model are ready.

Returns:

Type Description
HealthResponse

Server status and loaded model name.

Source code in src/unbias_plus/api.py
@app.get("/health", response_model=HealthResponse)
def health(request: Request) -> HealthResponse:
    """Check if the server and model are ready.

    Returns
    -------
    HealthResponse
        Server status and loaded model name.

    """
    pipe = getattr(request.app.state, "pipe", None)
    return HealthResponse(
        status="ok",
        model=str(pipe._model.model_name_or_path) if pipe else "not loaded",
    )

analyze

analyze(request, body)

Analyze input text for bias.

Parameters:

Name Type Description Default
request Request

FastAPI request (for app state).

required
body AnalyzeRequest

Request body containing the text to analyze.

required

Returns:

Type Description
BiasResult

Structured bias analysis result with character offsets.

Raises:

Type Description
HTTPException

500 if the model is not loaded or inference fails.

HTTPException

422 if the model output cannot be parsed.

Source code in src/unbias_plus/api.py
@app.post("/analyze", response_model=BiasResult)
def analyze(request: Request, body: AnalyzeRequest) -> BiasResult:
    """Analyze input text for bias.

    Parameters
    ----------
    request : Request
        FastAPI request (for app state).
    body : AnalyzeRequest
        Request body containing the text to analyze.

    Returns
    -------
    BiasResult
        Structured bias analysis result with character offsets.

    Raises
    ------
    HTTPException
        500 if the model is not loaded or inference fails.
    HTTPException
        422 if the model output cannot be parsed.

    """
    pipe = getattr(request.app.state, "pipe", None)
    if pipe is None:
        raise HTTPException(status_code=500, detail="Model not loaded.")
    try:
        return cast(BiasResult, pipe.analyze(body.text))
    except ValueError as e:
        raise HTTPException(status_code=422, detail=str(e)) from e

serve

serve(
    model_name_or_path=DEFAULT_MODEL,
    host="0.0.0.0",
    port=8000,
    load_in_4bit=False,
    reload=False,
)

Start the unbias-plus API server with the demo UI.

Loads the model and starts a uvicorn server. The demo UI is served at http://localhost:{port}/ and the API is at http://localhost:{port}/analyze.

Parameters:

Name Type Description Default
model_name_or_path str | Path

HuggingFace model ID or local path to the model.

DEFAULT_MODEL
host str

Host address to bind to. Default is '0.0.0.0'.

'0.0.0.0'
port int

Port to listen on. Default is 8000.

8000
load_in_4bit bool

Load model in 4-bit quantization. Default is False.

False
reload bool

Enable auto-reload on code changes. Default is False.

False

Examples:

>>> from unbias_plus.api import serve
>>> serve("Qwen/Qwen3-4B", port=8000)
Source code in src/unbias_plus/api.py
def serve(
    model_name_or_path: str | Path = DEFAULT_MODEL,
    host: str = "0.0.0.0",
    port: int = 8000,
    load_in_4bit: bool = False,
    reload: bool = False,
) -> None:
    """Start the unbias-plus API server with the demo UI.

    Loads the model and starts a uvicorn server. The demo UI
    is served at http://localhost:{port}/ and the API is at
    http://localhost:{port}/analyze.

    Parameters
    ----------
    model_name_or_path : str | Path
        HuggingFace model ID or local path to the model.
    host : str
        Host address to bind to. Default is '0.0.0.0'.
    port : int
        Port to listen on. Default is 8000.
    load_in_4bit : bool
        Load model in 4-bit quantization. Default is False.
    reload : bool
        Enable auto-reload on code changes. Default is False.

    Examples
    --------
    >>> from unbias_plus.api import serve
    >>> serve("Qwen/Qwen3-4B", port=8000)  # doctest: +SKIP


    """
    app.state.model_name_or_path = str(model_name_or_path)
    app.state.load_in_4bit = load_in_4bit
    print(f"Starting unbias-plus server at http://localhost:{port}")
    uvicorn.run(app, host=host, port=port, reload=reload)

CLI

unbias_plus.cli

CLI entry point for unbias-plus.

parse_args

parse_args()

Parse CLI arguments.

Returns:

Type Description
Namespace

Parsed arguments.

Source code in src/unbias_plus/cli.py
def parse_args() -> argparse.Namespace:
    """Parse CLI arguments.

    Returns
    -------
    argparse.Namespace
        Parsed arguments.

    """
    parser = argparse.ArgumentParser(
        prog="unbias-plus",
        description="Detect and debias text using a single LLM.",
    )

    input_group = parser.add_mutually_exclusive_group()
    input_group.add_argument(
        "--text",
        type=str,
        help="Text string to analyze.",
    )
    input_group.add_argument(
        "--file",
        type=str,
        help="Path to a .txt file to analyze.",
    )
    input_group.add_argument(
        "--serve",
        action="store_true",
        default=False,
        help="Start the FastAPI server.",
    )

    parser.add_argument(
        "--model",
        type=str,
        default=DEFAULT_MODEL,
        help=f"HuggingFace model ID or local path. Default: {DEFAULT_MODEL}",
    )
    parser.add_argument(
        "--load-in-4bit",
        action="store_true",
        default=False,
        help="Load model in 4-bit quantization to reduce VRAM usage.",
    )
    parser.add_argument(
        "--json",
        action="store_true",
        default=False,
        help="Output result as raw JSON instead of formatted CLI display.",
    )
    parser.add_argument(
        "--max-new-tokens",
        type=int,
        default=2048,
        help="Maximum number of tokens to generate. Default: 1024",
    )
    parser.add_argument(
        "--host",
        type=str,
        default="0.0.0.0",
        help="Host for the API server. Default: 0.0.0.0",
    )
    parser.add_argument(
        "--port",
        type=int,
        default=8000,
        help="Port for the API server. Default: 8000",
    )

    return parser.parse_args()

main

main()

Run the unbias-plus CLI.

Examples:

$ unbias-plus --text "Women are too emotional to lead." $ unbias-plus --file article.txt --json $ unbias-plus --serve --model path/to/model --port 8000 $ unbias-plus --serve --load-in-4bit

Source code in src/unbias_plus/cli.py
def main() -> None:
    """Run the unbias-plus CLI.

    Examples
    --------
    $ unbias-plus --text "Women are too emotional to lead."
    $ unbias-plus --file article.txt --json
    $ unbias-plus --serve --model path/to/model --port 8000
    $ unbias-plus --serve --load-in-4bit

    """
    args = parse_args()

    if args.serve:
        serve(
            model_name_or_path=args.model,
            host=args.host,
            port=args.port,
            load_in_4bit=args.load_in_4bit,
        )
        return

    if not args.text and not args.file:
        print(
            "Error: one of --text, --file, or --serve is required.",
            file=sys.stderr,
        )
        sys.exit(1)

    if args.file:
        try:
            with open(args.file) as f:
                text = f.read()
        except FileNotFoundError:
            print(f"Error: file '{args.file}' not found.", file=sys.stderr)
            sys.exit(1)
    else:
        text = args.text

    pipe = UnBiasPlus(
        model_name_or_path=args.model,
        load_in_4bit=args.load_in_4bit,
        max_new_tokens=args.max_new_tokens,
    )

    if args.json:
        print(pipe.analyze_to_json(text))
    else:
        print(pipe.analyze_to_cli(text))

Prompt

unbias_plus.prompt

Prompt templates for the unbias-plus LLM.

build_messages

build_messages(text)

Build the chat messages list for the LLM given input text.

Formats the system prompt and user text into the messages format required by the model's chat template.

Parameters:

Name Type Description Default
text str

The input text to analyze for bias.

required

Returns:

Type Description
list[dict]

List of {"role": ..., "content": ...} dicts ready for tokenizer.apply_chat_template().

Examples:

>>> messages = build_messages("Women are too emotional to lead.")
>>> messages[0]["role"]
'system'
>>> messages[1]["role"]
'user'
>>> "Women are too emotional to lead." in messages[1]["content"]
True
Source code in src/unbias_plus/prompt.py
def build_messages(text: str) -> list[dict]:
    """Build the chat messages list for the LLM given input text.

    Formats the system prompt and user text into the messages format
    required by the model's chat template.

    Parameters
    ----------
    text : str
        The input text to analyze for bias.

    Returns
    -------
    list[dict]
        List of {"role": ..., "content": ...} dicts ready for
        tokenizer.apply_chat_template().

    Examples
    --------
    >>> messages = build_messages("Women are too emotional to lead.")
    >>> messages[0]["role"]
    'system'
    >>> messages[1]["role"]
    'user'
    >>> "Women are too emotional to lead." in messages[1]["content"]
    True
    """
    return [
        {"role": "system", "content": SYSTEM_PROMPT},
        {
            "role": "user",
            "content": (
                "Analyze the following text for bias and return the result "
                "in the required JSON format.\n\n"
                f"TEXT:\n{text}"
            ),
        },
    ]

Parser

unbias_plus.parser

Parser for LLM JSON output into BiasResult objects.

parse_llm_output

parse_llm_output(raw_output)

Parse raw LLM output string into a BiasResult object.

Handles Qwen3 thinking blocks (...) as well as plain JSON output from any model. Attempts multiple strategies to extract and parse a JSON object from the raw LLM output, then validates it against the BiasResult schema.

Strategies (in order): 1. Extract JSON by brace counting — stops at closing } so any hallucinated text after the JSON block is ignored entirely. 2. Strip thinking block from extracted text if present. 3. Direct JSON parse of extracted block. 4. Fix truncated strings (LLM cut off mid-output). 5. Fix missing commas between JSON items. 6. Aggressive key-by-key extraction as last resort.

Parameters:

Name Type Description Default
raw_output str

Raw string returned by the LLM, may include a thinking block, extra text, markdown code fences, or be truncated/malformed.

required

Returns:

Type Description
BiasResult

Validated and structured bias analysis result.

Raises:

Type Description
ValueError

If the output cannot be parsed as valid JSON or does not match the expected BiasResult schema after all repair attempts.

Examples:

>>> raw = '''
... {
...   "binary_label": "biased",
...   "severity": 3,
...   "bias_found": true,
...   "biased_segments": [],
...   "unbiased_text": "A neutral version."
... }
... '''
>>> result = parse_llm_output(raw)
>>> result.binary_label
'biased'
Source code in src/unbias_plus/parser.py
def parse_llm_output(raw_output: str) -> BiasResult:
    """Parse raw LLM output string into a BiasResult object.

    Handles Qwen3 thinking blocks (<think>...</think>) as well as
    plain JSON output from any model. Attempts multiple strategies
    to extract and parse a JSON object from the raw LLM output,
    then validates it against the BiasResult schema.

    Strategies (in order):
    1. Extract JSON by brace counting — stops at closing } so any
       hallucinated text after the JSON block is ignored entirely.
    2. Strip thinking block from extracted text if present.
    3. Direct JSON parse of extracted block.
    4. Fix truncated strings (LLM cut off mid-output).
    5. Fix missing commas between JSON items.
    6. Aggressive key-by-key extraction as last resort.

    Parameters
    ----------
    raw_output : str
        Raw string returned by the LLM, may include a thinking block,
        extra text, markdown code fences, or be truncated/malformed.

    Returns
    -------
    BiasResult
        Validated and structured bias analysis result.

    Raises
    ------
    ValueError
        If the output cannot be parsed as valid JSON or does
        not match the expected BiasResult schema after all
        repair attempts.

    Examples
    --------
    >>> raw = '''
    ... {
    ...   "binary_label": "biased",
    ...   "severity": 3,
    ...   "bias_found": true,
    ...   "biased_segments": [],
    ...   "unbiased_text": "A neutral version."
    ... }
    ... '''
    >>> result = parse_llm_output(raw)
    >>> result.binary_label
    'biased'

    """
    # Step 1: Extract JSON by brace counting first.
    # This stops at the closing } of the root JSON object so any
    # hallucinated text appended after (e.g. "assistant\n<think>\nuser\n...")
    # is ignored entirely before any other processing.
    cleaned = _extract_json(raw_output)

    # Step 2: Strip thinking block from the extracted text.
    # Safe to call on any model — no-op if no thinking block present.
    # Runs after extraction so a <think> tag hallucinated after the JSON
    # never causes _strip_thinking_block to incorrectly empty the string.
    text = _strip_thinking_block(cleaned)

    # Strategy 1: Direct parse
    data = _try_parse(text)

    # Strategy 2: Fix truncated JSON (most common LLM failure)
    if data is None:
        data = _try_parse(_fix_truncated_json(text))

    # Strategy 3: Fix missing commas
    if data is None:
        data = _try_parse(_fix_missing_commas(text))

    # Strategy 4: Fix truncated + missing commas combined
    if data is None:
        data = _try_parse(_fix_missing_commas(_fix_truncated_json(text)))

    # Strategy 5: Regex-based field extraction (last resort)
    if data is None:
        data = _extract_fields_by_regex(text)

    if data is None:
        raise ValueError(
            f"LLM output could not be parsed as JSON after all repair attempts.\n"
            f"Raw output:\n{raw_output}"
        )

    # Deduplicate segments with the same original phrase before schema validation
    if "biased_segments" in data and isinstance(data["biased_segments"], list):
        data["biased_segments"] = _deduplicate_segments(data["biased_segments"])

    try:
        return BiasResult(**data)
    except Exception as e:
        raise ValueError(
            f"LLM JSON does not match expected schema.\nData: {data}\nError: {e}"
        ) from e

Formatter

unbias_plus.formatter

Formatters for displaying BiasResult output.

format_cli

format_cli(result)

Format a BiasResult for CLI terminal display.

Produces a human-readable, colored terminal output showing the bias label, severity, each biased segment with its replacement and reasoning, and the full unbiased rewrite.

Parameters:

Name Type Description Default
result BiasResult

The bias analysis result to format.

required

Returns:

Type Description
str

A human-readable colored string for terminal output.

Examples:

>>> result = BiasResult(
...     binary_label="biased",
...     severity=3,
...     bias_found=True,
...     biased_segments=[],
...     unbiased_text="Neutral.",
... )
>>> output = format_cli(result)
>>> isinstance(output, str)
True
Source code in src/unbias_plus/formatter.py
def format_cli(result: BiasResult) -> str:
    """Format a BiasResult for CLI terminal display.

    Produces a human-readable, colored terminal output showing
    the bias label, severity, each biased segment with its
    replacement and reasoning, and the full unbiased rewrite.

    Parameters
    ----------
    result : BiasResult
        The bias analysis result to format.

    Returns
    -------
    str
        A human-readable colored string for terminal output.

    Examples
    --------
    >>> result = BiasResult(
    ...     binary_label="biased",
    ...     severity=3,
    ...     bias_found=True,
    ...     biased_segments=[],
    ...     unbiased_text="Neutral.",
    ... )
    >>> output = format_cli(result)
    >>> isinstance(output, str)
    True

    """
    lines = []
    lines.append("=" * 60)
    if result.bias_found:
        lines.append(f"Segments found: {len(result.biased_segments)}")
    if not result.biased_segments:
        lines.append("\nNo biased segments detected.")
    lines.append("=" * 60)

    if result.biased_segments:
        lines.append("\nBIASED SEGMENTS:")
        for i, seg in enumerate(result.biased_segments, 1):
            color = _SEVERITY_COLORS.get(seg.severity, "")
            reset = _SEVERITY_COLORS["reset"]
            lines.append(f"\n  [{i}] {color}{seg.severity.upper()}{reset}")
            lines.append(f'  Original  : "{seg.original}"')
            lines.append(f'  Replace   : "{seg.replacement}"')
            lines.append(f"  Bias type : {seg.bias_type}")
            lines.append(f"  Reasoning : {seg.reasoning}")

    lines.append("\n" + "-" * 60)
    lines.append("NEUTRAL REWRITE:")
    lines.append(result.unbiased_text)
    lines.append("=" * 60)

    return "\n".join(lines)

format_dict

format_dict(result)

Convert a BiasResult to a plain Python dictionary.

Parameters:

Name Type Description Default
result BiasResult

The bias analysis result to convert.

required

Returns:

Type Description
dict

Plain dictionary representation of the result.

Examples:

>>> result = BiasResult(
...     binary_label="biased",
...     severity=3,
...     bias_found=True,
...     biased_segments=[],
...     unbiased_text="Neutral.",
... )
>>> d = format_dict(result)
>>> isinstance(d, dict)
True
Source code in src/unbias_plus/formatter.py
def format_dict(result: BiasResult) -> dict:
    """Convert a BiasResult to a plain Python dictionary.

    Parameters
    ----------
    result : BiasResult
        The bias analysis result to convert.

    Returns
    -------
    dict
        Plain dictionary representation of the result.

    Examples
    --------
    >>> result = BiasResult(
    ...     binary_label="biased",
    ...     severity=3,
    ...     bias_found=True,
    ...     biased_segments=[],
    ...     unbiased_text="Neutral.",
    ... )
    >>> d = format_dict(result)
    >>> isinstance(d, dict)
    True

    """
    return result.model_dump()

format_json

format_json(result)

Convert a BiasResult to a formatted JSON string.

Parameters:

Name Type Description Default
result BiasResult

The bias analysis result to convert.

required

Returns:

Type Description
str

Pretty-printed JSON string representation of the result.

Examples:

>>> result = BiasResult(
...     binary_label="biased",
...     severity=3,
...     bias_found=True,
...     biased_segments=[],
...     unbiased_text="Neutral.",
... )
>>> json_str = format_json(result)
>>> isinstance(json_str, str)
True
Source code in src/unbias_plus/formatter.py
def format_json(result: BiasResult) -> str:
    """Convert a BiasResult to a formatted JSON string.

    Parameters
    ----------
    result : BiasResult
        The bias analysis result to convert.

    Returns
    -------
    str
        Pretty-printed JSON string representation of the result.

    Examples
    --------
    >>> result = BiasResult(
    ...     binary_label="biased",
    ...     severity=3,
    ...     bias_found=True,
    ...     biased_segments=[],
    ...     unbiased_text="Neutral.",
    ... )
    >>> json_str = format_json(result)
    >>> isinstance(json_str, str)
    True

    """
    return json.dumps(result.model_dump(), indent=2)