Create an Amazon SageMaker inference endpoint

PUT /_inference/{task_type}/{amazonsagemaker_inference_id}

Copy endpoint

Create an inference endpoint to perform an inference task with the amazon_sagemaker service.

Required authorization

Cluster privileges: manage_inference

Parameters

path Path Parameters

Name	Type
`task_type` required The type of the inference task that the model will perform.	type InferenceTypesTaskTypeAmazonSageMaker = "text_embedding" \| "completion" \| "chat_completion" \| "sparse_embedding" \| "rerank"
`amazonsagemaker_inference_id` required The unique identifier of the inference endpoint.	type TypesId = string

query Query Parameters

Name	Type
`timeout` Specifies the amount of time to wait for the inference endpoint to be created.	type TypesDuration = string \| "-1" \| "0"

Request Body

application/json required

{
chunking_settings?:

InferenceTypesInferenceChunkingSettings

Chunking configuration object

interface InferenceTypesInferenceChunkingSettings {
max_chunk_size?: number;
overlap?: number;
sentence_overlap?: number;
separator_group?: string;
separators?: string[];
strategy?: string;
}

;
service:

InferenceTypesAmazonSageMakerServiceType

type InferenceTypesAmazonSageMakerServiceType = "amazon_sagemaker"

;
service_settings:

InferenceTypesAmazonSageMakerServiceSettings

interface InferenceTypesAmazonSageMakerServiceSettings {
access_key: string;
endpoint_name: string;
api: InferenceTypesAmazonSageMakerApi;
region: string;
secret_key: string;
target_model?: string;
target_container_hostname?: string;
inference_component_name?: string;
batch_size?: number;
dimensions?: number;
}

;
task_settings?:

InferenceTypesAmazonSageMakerTaskSettings

interface InferenceTypesAmazonSageMakerTaskSettings {
custom_attributes?: string;
enable_explanations?: string;
inference_id?: string;
session_id?: string;
target_variant?: string;
}

;
}

Responses

200 application/json

type InferenceTypesInferenceEndpointInfoAmazonSageMaker = interface InferenceTypesInferenceEndpoint {
chunking_settings?:

InferenceTypesInferenceChunkingSettings

Chunking configuration object

interface InferenceTypesInferenceChunkingSettings {
max_chunk_size?: number;
overlap?: number;
sentence_overlap?: number;
separator_group?: string;
separators?: string[];
strategy?: string;
}

;
service: string;
service_settings:

InferenceTypesServiceSettings

interface InferenceTypesServiceSettings {}

;
task_settings?:

InferenceTypesTaskSettings

interface InferenceTypesTaskSettings {}

;
} & { inference_id: string;task_type:

InferenceTypesTaskTypeAmazonSageMaker

type InferenceTypesTaskTypeAmazonSageMaker = "text_embedding" | "completion" | "chat_completion" | "sparse_embedding" | "rerank"

; }