Search + K

Command Palette

Search for a command to run...

Sign In

Create an Azure AI studio inference endpoint

PUT /_inference/{task_type}/{azureaistudio_inference_id}
Copy endpoint

Create an inference endpoint to perform an inference task with the azureaistudio service.

Required authorization

  • Cluster privileges: manage_inference

Parameters

path Path Parameters

Name Type
task_type required

The type of the inference task that the model will perform.

type InferenceTypesAzureAiStudioTaskType = "completion" | "rerank" | "text_embedding"
azureaistudio_inference_id required

The unique identifier of the inference endpoint.

type TypesId = string

query Query Parameters

Name Type
timeout

Specifies the amount of time to wait for the inference endpoint to be created.

type TypesDuration = string | "-1" | "0"

Request Body

application/json required
{
chunking_settings?: InferenceTypesInferenceChunkingSettings

Chunking configuration object

interface InferenceTypesInferenceChunkingSettings {
max_chunk_size?: number;
overlap?: number;
sentence_overlap?: number;
separator_group?: string;
separators?: string[];
strategy?: string;
}
;
service: InferenceTypesAzureAiStudioServiceType
type InferenceTypesAzureAiStudioServiceType = "azureaistudio"
;
service_settings: InferenceTypesAzureAiStudioServiceSettings
interface InferenceTypesAzureAiStudioServiceSettings {
api_key: string;
endpoint_type: string;
target: string;
provider: string;
rate_limit?: InferenceTypesRateLimitSetting;
}
;
task_settings?: InferenceTypesAzureAiStudioTaskSettings
interface InferenceTypesAzureAiStudioTaskSettings {
do_sample?: number;
max_new_tokens?: number;
temperature?: number;
top_p?: number;
user?: string;
return_documents?: boolean;
top_n?: number;
}
;
}

Responses

200 application/json
type InferenceTypesInferenceEndpointInfoAzureAIStudio = interface InferenceTypesInferenceEndpoint {
chunking_settings?: InferenceTypesInferenceChunkingSettings

Chunking configuration object

interface InferenceTypesInferenceChunkingSettings {
max_chunk_size?: number;
overlap?: number;
sentence_overlap?: number;
separator_group?: string;
separators?: string[];
strategy?: string;
}
;
service: string;
service_settings: InferenceTypesServiceSettings
interface InferenceTypesServiceSettings {}
;
task_settings?: InferenceTypesTaskSettings
interface InferenceTypesTaskSettings {}
;
}
& { inference_id: string;task_type: InferenceTypesTaskTypeAzureAIStudio
type InferenceTypesTaskTypeAzureAIStudio = "text_embedding" | "completion" | "rerank"
; }