HuggingfaceClient.Inference.Task.ChatCompletion
(huggingface_client v0.1.0)
Copy Markdown
View Source
Chat completion task — OpenAI-compatible /v1/chat/completions.
Non-streaming
{:ok, response} = HuggingfaceClient.chat_completion(client, %{
model: "meta-llama/Llama-3.1-8B-Instruct",
messages: [%{role: "user", content: "Hello!"}],
max_tokens: 512
})Streaming
{:ok, stream} = HuggingfaceClient.chat_completion_stream(client, %{
model: "meta-llama/Llama-3.1-8B-Instruct",
messages: [%{role: "user", content: "Hello!"}]
})
for chunk <- stream do
IO.write(get_in(chunk, ["choices", Access.at(0), "delta", "content"]) || "")
endMulti-provider
{:ok, response} = HuggingfaceClient.chat_completion(client, %{
model: "meta-llama/Llama-3.1-8B-Instruct",
provider: "groq",
messages: [%{role: "user", content: "Hi!"}]
})
Summary
Functions
@spec run(HuggingfaceClient.Client.t(), map()) :: {:ok, map()} | {:error, Exception.t()}
Runs a non-streaming chat completion.
Returns {:ok, response_map} or {:error, exception}.
@spec stream(HuggingfaceClient.Client.t(), map()) :: {:ok, Enumerable.t()} | {:error, Exception.t()}
Runs a streaming chat completion.
Returns {:ok, stream} where each element is a decoded chunk map, or {:error, exception}.