ExLLM Public API Reference
View SourceThis document provides a comprehensive overview of ExLLM's public API. All functions listed here are accessed through the main ExLLM
module.
Core Chat Functions
Basic Chat
# Simple chat completion
{:ok, response} = ExLLM.chat(:openai, "Hello, world!")
# With options
{:ok, response} = ExLLM.chat(:anthropic, "Explain quantum physics",
model: "claude-3-opus",
temperature: 0.7,
max_tokens: 500
)
Streaming Chat
# Stream responses with a callback
{:ok, stream_id} = ExLLM.stream_chat(:openai, "Tell me a story",
fn chunk ->
IO.write(chunk.content)
end
)
Session Management
Creating and Using Sessions
# Create a new session
{:ok, session} = ExLLM.new_session(:anthropic,
model: "claude-3-sonnet",
system: "You are a helpful assistant"
)
# Chat with session (maintains conversation history)
{:ok, session, response} = ExLLM.chat_with_session(session, "What's the weather?")
# Get session history
messages = ExLLM.get_session_messages(session)
# Save/load sessions
{:ok, _} = ExLLM.save_session(session, "/path/to/session.json")
{:ok, session} = ExLLM.load_session("/path/to/session.json")
Model Information
Listing and Querying Models
# List available models for a provider
{:ok, models} = ExLLM.list_models(:openai)
# Get detailed model information
{:ok, info} = ExLLM.get_model_info(:anthropic, "claude-3-opus")
# Check model capabilities
true = ExLLM.model_supports?(:openai, "gpt-4-vision", :vision)
# Get default model
model = ExLLM.default_model(:openai)
Cost Tracking
Calculate and Estimate Costs
# Calculate actual cost from response
{:ok, cost} = ExLLM.calculate_cost(:openai, "gpt-4",
%{input_tokens: 100, output_tokens: 200}
)
# Format cost for display
"$0.0045" = ExLLM.format_cost(cost)
# Estimate tokens before making request
token_count = ExLLM.estimate_tokens("This is my prompt")
Context Management
Handle Context Windows
# Prepare messages to fit context window
{:ok, messages} = ExLLM.prepare_messages(messages, :openai,
model: "gpt-4",
strategy: :sliding_window
)
# Validate context size
{:ok, :valid} = ExLLM.validate_context(messages, :anthropic,
model: "claude-3-opus"
)
# Get context window size
{:ok, 100000} = ExLLM.context_window_size(:anthropic, "claude-3-opus")
Function Calling
Execute Functions with LLMs
# Define available functions
functions = [
%{
name: "get_weather",
description: "Get weather for a location",
parameters: %{
type: "object",
properties: %{
location: %{type: "string"}
}
}
}
]
# Chat with function calling
{:ok, response} = ExLLM.chat(:openai, "What's the weather in NYC?",
functions: functions
)
# Parse and execute function calls
{:ok, calls} = ExLLM.parse_function_calls(response)
{:ok, result} = ExLLM.execute_function(List.first(calls),
fn "get_weather", %{"location" => loc} ->
{:ok, "Sunny, 72°F in #{loc}"}
end
)
# Format result for LLM
formatted = ExLLM.format_function_result("get_weather", result)
Embeddings
Generate Text Embeddings
# Single text embedding
{:ok, embedding} = ExLLM.embeddings(:openai, "Hello world")
# Multiple texts
{:ok, embeddings} = ExLLM.embeddings(:openai, ["Text 1", "Text 2"])
# Calculate similarity
similarity = ExLLM.cosine_similarity(embedding1, embedding2)
# List embedding models
{:ok, models} = ExLLM.list_embedding_models(:openai)
Vision and Multimodal
Work with Images
# Load and validate image
{:ok, image_data} = ExLLM.load_image("/path/to/image.jpg",
max_size: {1024, 1024},
format: :jpeg
)
# Create vision message
message = ExLLM.vision_message("What's in this image?",
["/path/to/image.jpg"]
)
# Check vision support
true = ExLLM.supports_vision?(:openai, "gpt-4-vision")
Provider Information
Query Provider Capabilities
# List all supported providers
providers = ExLLM.supported_providers()
# Get provider capabilities
{:ok, caps} = ExLLM.get_provider_capabilities(:anthropic)
# Check specific capability
true = ExLLM.provider_supports?(:openai, :streaming)
# Check if provider is configured
true = ExLLM.configured?(:openai)
Streaming Recovery
Handle Stream Interruptions
# Resume an interrupted stream
{:ok, new_stream_id} = ExLLM.resume_stream(old_stream_id,
fn chunk -> IO.write(chunk.content) end
)
# List recoverable streams
streams = ExLLM.list_recoverable_streams()
Configuration
ExLLM can be configured through environment variables or application config:
# config/config.exs
config :ex_llm,
# Provider API keys
openai_api_key: System.get_env("OPENAI_API_KEY"),
anthropic_api_key: System.get_env("ANTHROPIC_API_KEY"),
# Default models
default_models: %{
openai: "gpt-4-turbo-preview",
anthropic: "claude-3-sonnet"
},
# Global options
cache_enabled: true,
log_level: :info
Error Handling
All API functions return {:ok, result}
or {:error, reason}
tuples:
case ExLLM.chat(:openai, "Hello") do
{:ok, response} ->
IO.puts(response.content)
{:error, {:api_error, status, message}} ->
IO.puts("API error #{status}: #{message}")
{:error, {:rate_limit, retry_after}} ->
IO.puts("Rate limited, retry after #{retry_after}s")
{:error, reason} ->
IO.puts("Error: #{inspect(reason)}")
end
Advanced Usage
Custom Configuration Provider
# Use a custom configuration provider
{:ok, response} = ExLLM.chat(:openai, "Hello",
config_provider: MyApp.ConfigProvider
)
Request Options
# All available options for chat requests
{:ok, response} = ExLLM.chat(:openai, "Hello",
# Model selection
model: "gpt-4",
# Generation parameters
temperature: 0.7,
max_tokens: 1000,
top_p: 0.9,
frequency_penalty: 0.0,
presence_penalty: 0.0,
stop: ["\\n\\n"],
# Function calling
functions: [...],
function_call: "auto",
# Response format
response_format: %{type: "json_object"},
# System message
system: "You are a helpful assistant",
# Other options
user: "user-123",
seed: 42,
track_cost: true
)
Best Practices
- Always handle errors: LLM APIs can fail for various reasons
- Use sessions for conversations: Maintains context automatically
- Monitor costs: Use cost tracking functions to avoid surprises
- Validate context size: Ensure messages fit within model limits
- Configure providers properly: Set API keys and default models
- Use streaming for long responses: Better user experience
- Cache responses when appropriate: Reduce costs and latency
Type Specifications
All return types are defined in ExLLM.Types
:
# Main response type
%ExLLM.Types.LLMResponse{
content: String.t(),
model: String.t(),
usage: %{input_tokens: integer(), output_tokens: integer()},
cost: %{input: float(), output: float(), total: float()},
finish_reason: String.t() | nil,
function_call: map() | nil,
tool_calls: [map()] | nil
}
# Streaming chunk type
%ExLLM.Types.StreamChunk{
content: String.t(),
finish_reason: String.t() | nil,
chunk_index: integer()
}
# Session type
%ExLLM.Types.Session{
provider: atom(),
model: String.t(),
messages: [map()],
total_tokens: %{input: integer(), output: integer()},
total_cost: %{input: float(), output: float(), total: float()},
metadata: map()
}