Skip to content

Llm

Implementation of API for LLMs.

OpenAIClassifier(k_shot=None, prompt=None, openai_client=None, responses_kwargs=None, classes=None)

Bases: ClassifierMixin, MultiOutputMixin, BaseEstimator

OpenAI classifier.

A classifier that is based on the OpenAI API.

Parameters:

Name Type Description Default
k_shot int | ArrayLike | None

Number of examples to include into the prompt. If None then no examples are included. If int then it is equal to the number of example that are selected randomly. If an array of indices is provided then it is used to select the examples.

None
prompt str | None

Prompt for the OpenAI API.

None
openai_client str | OpenAI | AsyncOpenAI | None

OpenAI client.

None
responses_kwargs dict | None

Keyword arguments for the OpenAI API.

None
classes dict | ndarray | list[ndarray] | None

Class labels.

None

Attributes:

Name Type Description
k_shot_ int | dict[int, ArrayLike | RandomState] | None

Number of examples per class.

prompt_ str | None

Prompt for the OpenAI API.

openai_client_ OpenAI | AsyncOpenAI

OpenAI client.

responses_kwargs_ dict | None

Keyword arguments for the OpenAI API.

classes_ ndarray | list[ndarray] | None

Class labels.

instructions_ str

Instructions for the OpenAI API.

Source code in src/skai/llm/_openai.py
82
83
84
85
86
87
88
89
90
91
92
93
94
def __init__(
    self: Self,
    k_shot: int | ArrayLike | None = None,
    prompt: str | None = None,
    openai_client: str | openai.OpenAI | openai.AsyncOpenAI | None = None,
    responses_kwargs: dict | None = None,
    classes: dict | np.ndarray | list[np.ndarray] | None = None,
) -> None:
    self.k_shot = k_shot
    self.prompt = prompt
    self.responses_kwargs = responses_kwargs
    self.openai_client = openai_client
    self.classes = classes

__sklearn_tags__()

Classifier tags.

Source code in src/skai/llm/_openai.py
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
def __sklearn_tags__(self: Self) -> Tags:
    """Classifier tags."""
    tags = super().__sklearn_tags__()
    tags.classifier_tags = ClassifierTags(
        poor_score=False,
        multi_class=True,
        multi_label=True,
    )
    tags.target_tags = TargetTags(
        required=True,
        multi_output=True,
        single_output=True,
    )
    return tags

fit(X=None, y=None)

Fit the classifier to the training dataset.

Parameters:

Name Type Description Default
X ArrayLike | None

Input data.

None
y ArrayLike | None

Target values.

None

Returns:

Name Type Description
self Self

The fitted OpenAI classifier.

Source code in src/skai/llm/_openai.py
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
@_fit_context(prefer_skip_nested_validation=True)
def fit(self, X: ArrayLike | None = None, y: ArrayLike | None = None) -> Self:
    """Fit the classifier to the training dataset.

    Args:
        X:
            Input data.

        y:
            Target values.

    Returns:
        self:
            The fitted OpenAI classifier.
    """
    return self._fit(X, y)

predict(X)

Predict the class labels for the provided data.

Parameters:

Name Type Description Default
X ArrayLike

Input data.

required

Returns:

Type Description
NDArray

The predicted class labels.

Source code in src/skai/llm/_openai.py
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
def predict(self, X: ArrayLike) -> NDArray:
    """Predict the class labels for the provided data.

    Args:
        X:
            Input data.

    Returns:
        The predicted class labels.
    """
    check_is_fitted(
        self,
        ('k_shot_', 'openai_client_', 'responses_kwargs_', 'prompt_', 'classes_', 'instructions_'),
    )
    X = check_array(X, ensure_2d=False, dtype=str)
    if isinstance(self.openai_client_, openai.OpenAI):
        predictions = self._predict_sync(X)
    else:
        predictions = asyncio.run(self._predict_async(X.tolist()))
    if isinstance(self.classes, dict):
        classes_mapping = {v: k for k, v in self.classes.items()}
        predictions = [classes_mapping.get(pred.strip(), self.classes[0]) for pred in predictions]
    return np.array(predictions, dtype=self.y_dtype_)