xinference.client.Client#

class xinference.client.Client(base_url, api_key: str | None = None)[source]#
__init__(base_url, api_key: str | None = None)[source]#

Methods

__init__(base_url[, api_key])

abort_cluster()

abort_request(model_uid, request_id[, ...])

Abort a request.

confirm_and_remove_model(model_version[, ...])

Remove the cached models with the model name cached on the server.

describe_model(model_uid)

Get model information via RESTful APIs.

get_model(model_uid)

Launch the model based on the parameters on the server via RESTful APIs.

get_model_registration(model_type, model_name)

Get the model with the model type and model name registered on the server.

get_progress(request_id)

get_supervisor_info()

get_workers_info()

launch_model(model_name[, model_type, ...])

Launch the model based on the parameters on the server via RESTful APIs.

list_cached_models([model_name, worker_ip])

Get a list of cached models.

list_deletable_models(model_version[, worker_ip])

Get the cached models with the model path cached on the server.

list_model_registrations(model_type)

List models registered on the server.

list_models()

Retrieve the model specifications from the Server.

login(username, password)

query_engine_by_model_name(model_name)

Get the engine parameters with the model name registered on the server.

register_model(model_type, model, persist[, ...])

Register a custom model.

terminate_model(model_uid)

Terminate the specific model running on the server.

unregister_model(model_type, model_name)

Unregister a custom model.

vllm_models()