AxiomAI
View SourceA unified Elixir client for multiple AI providers.
Installation
def deps do
[
{:axiom_ai, "~> 0.1.0"}
]
end
Quick Start
# Vertex AI (Google Cloud)
client = AxiomAi.new(:vertex_ai, %{project_id: "your-gcp-project"})
{:ok, response} = AxiomAi.chat(client, "Hello!")
# OpenAI
client = AxiomAi.new(:openai, %{api_key: "your-openai-key"})
{:ok, response} = AxiomAi.chat(client, "Hello!")
# Anthropic Claude
client = AxiomAi.new(:anthropic, %{api_key: "your-anthropic-key"})
{:ok, response} = AxiomAi.chat(client, "Hello!")
# Local AI models
client = AxiomAi.new(:local, %{predefined_model: "qwen2.5-0.5b"})
{:ok, response} = AxiomAi.chat(client, "Hello!")
# Whisper speech-to-text
client = AxiomAi.new(:local, %{predefined_model: "whisper-large-v3-turbo"})
{:ok, response} = AxiomAi.chat(client, "/path/to/audio.wav|Transcribe this audio")
Supported Providers
Provider | Authentication | Example |
---|---|---|
Vertex AI | Service Account, ADC | AxiomAi.new(:vertex_ai, %{project_id: "my-project"}) |
OpenAI | API Key | AxiomAi.new(:openai, %{api_key: "sk-..."}) |
Anthropic | API Key | AxiomAi.new(:anthropic, %{api_key: "sk-..."}) |
DeepSeek | API Key | AxiomAi.new(:deepseek, %{api_key: "sk-..."}) |
AWS Bedrock | AWS Credentials | AxiomAi.new(:bedrock, %{model: "anthropic.claude-3-haiku-20240307-v1:0"}) |
Local AI | None | AxiomAi.new(:local, %{predefined_model: "qwen2.5-0.5b"}) |
Predefined Local Models
# List available models
models = AxiomAi.LocalModels.list_models()
IO.inspect(models)
Available Models:
Text Generation:
qwen2.5-0.5b
- Qwen2.5 0.5Bqwen2.5-1.5b
- Qwen2.5 1.5Bqwen2.5-3b
- Qwen2.5 3Bllama3-8b
- Llama 3 8Bmistral-7b
- Mistral 7Bcodellama-7b
- Code Llama 7B
Speech-to-Text:
whisper-large-v3
- Whisper Large v3whisper-large-v3-turbo
- Whisper Large v3 Turbo
OCR:
nanonets-ocr-s
- Nanonets OCR Small
Custom Local Providers
AxiomAI's local provider supports multiple execution methods beyond predefined models, allowing you to run custom AI models and HTTP endpoints.
1. HTTP Endpoints (OpenAI-Compatible APIs)
Connect to any OpenAI-compatible API server like Ollama, vLLM, or custom model servers:
# OpenAI-compatible API (vLLM, FastAPI, etc.)
client = AxiomAi.new(:local, %{
endpoint: "http://localhost:8000",
api_format: :openai, # :openai or :ollama
model: "meta-llama/Llama-2-7b-hf",
api_key: "optional-api-key", # if required by your server
temperature: 0.7,
max_tokens: 1024
})
{:ok, response} = AxiomAi.chat(client, "Hello, how are you?")
# Ollama server
ollama_client = AxiomAi.new(:local, %{
endpoint: "http://localhost:11434",
api_format: :ollama,
model: "llama2:7b",
temperature: 0.8,
max_tokens: 2048
})
{:ok, response} = AxiomAi.chat(ollama_client, "Explain quantum computing")
2. Direct Python Integration
Run AI models directly in Python using either script files or embedded code:
Option A: Python Script Files
# Using external Python script
client = AxiomAi.new(:local, %{
python_script: """
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import json
import sys
def main():
input_data = json.loads(sys.argv[1])
model_path = input_data['model_path']
prompt = input_data['prompt']
max_tokens = input_data.get('max_tokens', 1024)
temperature = input_data.get('temperature', 0.7)
# Load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(
model_path,
torch_dtype=torch.float16,
device_map="auto"
)
# Generate response
inputs = tokenizer(prompt, return_tensors="pt")
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_tokens,
temperature=temperature,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
print(json.dumps({"response": response}))
if __name__ == "__main__":
main()
""",
model_path: "microsoft/DialoGPT-medium",
temperature: 0.7,
max_tokens: 512
})
{:ok, response} = AxiomAi.chat(client, "What is machine learning?")
Option B: Embedded Python Code (Pythonx)
# Using embedded Python with automatic dependency management
client = AxiomAi.new(:local, %{
python_deps: """
[project]
name = "custom_inference"
version = "0.1.0"
requires-python = ">=3.8"
dependencies = [
"torch >= 2.0.0",
"transformers >= 4.35.0",
"accelerate >= 0.20.0"
]
""",
python_code: """
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
# Global variables for model caching
_model = None
_tokenizer = None
_current_model_path = None
def load_model(model_path):
global _model, _tokenizer, _current_model_path
if _current_model_path != model_path:
_tokenizer = AutoTokenizer.from_pretrained(model_path)
_model = AutoModelForCausalLM.from_pretrained(
model_path,
torch_dtype=torch.float16,
device_map="auto"
)
_current_model_path = model_path
return _tokenizer, _model
def generate_response(model_path, prompt, max_tokens=1024, temperature=0.7):
tokenizer, model = load_model(model_path)
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
generated_ids = model.generate(
**inputs,
max_new_tokens=max_tokens,
temperature=temperature,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
response = tokenizer.decode(
generated_ids[0][inputs.input_ids.shape[1]:],
skip_special_tokens=True
)
return response
""",
model_path: "gpt2-medium",
temperature: 0.8,
max_tokens: 256
})
{:ok, response} = AxiomAi.chat(client, "Write a short story about AI")
3. Vision Models
Run multimodal models that can process both text and images:
# Vision-language model
vision_client = AxiomAi.new(:local, %{
python_deps: """
[project]
name = "vision_inference"
requires-python = ">=3.8"
dependencies = [
"torch >= 2.0.0",
"transformers >= 4.35.0",
"pillow >= 9.0.0"
]
""",
python_code: """
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoProcessor
from PIL import Image
_model = None
_processor = None
_current_model_path = None
def load_model(model_path):
global _model, _processor, _current_model_path
if _current_model_path != model_path:
_processor = AutoProcessor.from_pretrained(model_path)
_model = AutoModelForCausalLM.from_pretrained(
model_path,
torch_dtype=torch.float32,
device_map="cpu"
)
_current_model_path = model_path
return _processor, _model
def generate_response(model_path, prompt, max_tokens=512, temperature=0.7):
processor, model = load_model(model_path)
# Handle image+text input: "image_path|describe this image"
if "|" in prompt:
image_path, text_prompt = prompt.split("|", 1)
image = Image.open(image_path.strip()).convert('RGB')
inputs = processor(text=text_prompt.strip(), images=image, return_tensors="pt")
else:
inputs = processor(text=prompt, return_tensors="pt")
with torch.no_grad():
generated_ids = model.generate(
**inputs,
max_new_tokens=max_tokens,
temperature=temperature,
do_sample=False
)
response = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
return response
""",
model_path: "microsoft/kosmos-2-patch14-224",
temperature: 0.1,
max_tokens: 256
})
# Process image with text prompt
{:ok, response} = AxiomAi.chat(vision_client, "/path/to/image.jpg|What do you see in this image?")
4. Speech-to-Text Models
Transcribe audio files using speech recognition models:
# Whisper model for speech transcription
speech_client = AxiomAi.new(:local, %{
python_deps: """
[project]
name = "speech_inference"
requires-python = ">=3.8"
dependencies = [
"torch >= 2.0.0",
"transformers >= 4.35.0",
"librosa >= 0.10.0"
]
""",
python_code: """
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor
import librosa
_model = None
_processor = None
_current_model_path = None
def load_model(model_path):
global _model, _processor, _current_model_path
if _current_model_path != model_path:
_processor = AutoProcessor.from_pretrained(model_path)
_model = AutoModelForSpeechSeq2Seq.from_pretrained(
model_path,
torch_dtype=torch.float16,
device_map="auto"
)
_current_model_path = model_path
return _processor, _model
def generate_response(model_path, prompt, max_tokens=448, temperature=0.0):
processor, model = load_model(model_path)
# Handle audio input: "audio_path|transcription task"
if "|" in prompt:
audio_path = prompt.split("|")[0].strip()
audio, _ = librosa.load(audio_path, sr=16000)
inputs = processor(audio, sampling_rate=16000, return_tensors="pt")
with torch.no_grad():
predicted_ids = model.generate(
inputs["input_features"],
max_new_tokens=max_tokens,
temperature=temperature,
do_sample=False
)
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
return transcription
else:
return "Error: Please provide audio file path"
""",
model_path: "openai/whisper-base",
temperature: 0.0,
max_tokens: 448
})
# Transcribe audio file
{:ok, response} = AxiomAi.chat(speech_client, "/path/to/audio.wav|Transcribe this audio")
5. Using Templates for Quick Setup
AxiomAI provides built-in templates for common model types:
# Use built-in templates with overrides
alias AxiomAi.LocalModels.Templates
# Text generation template
text_config = Templates.create_from_template(:pythonx_text, %{
model_path: "meta-llama/Llama-2-7b-chat-hf",
temperature: 0.9,
max_tokens: 2048
})
client = AxiomAi.new(:local, text_config)
# Vision template
vision_config = Templates.create_from_template(:pythonx_vision, %{
model_path: "microsoft/kosmos-2-patch14-224"
})
vision_client = AxiomAi.new(:local, vision_config)
# HTTP endpoint templates
ollama_config = Templates.create_from_template(:http_ollama, %{
endpoint: "http://localhost:11434",
model: "llama2:13b"
})
ollama_client = AxiomAi.new(:local, ollama_config)
# List available templates
IO.inspect(Templates.list_templates())
# [:pythonx_text, :pythonx_vision, :pythonx_speech, :http_openai, :http_ollama, :custom]
Configuration Options
All local provider configurations support these common options:
%{
# Execution type (automatically detected):
predefined_model: "qwen2.5-0.5b", # Use predefined model
endpoint: "http://localhost:8000", # HTTP endpoint
python_script: "script content...", # Python script execution
python_code: "code content...", # Embedded Python code
# Model parameters:
model_path: "path/to/model", # Model identifier or path
model: "model-name", # Model name for HTTP APIs
temperature: 0.7, # Sampling temperature (0.0-2.0)
max_tokens: 1024, # Maximum tokens to generate
# HTTP-specific:
api_format: :openai, # :openai or :ollama
api_key: "optional-key", # API key if required
# Python-specific:
python_deps: "pyproject.toml content", # Python dependencies (Pythonx only)
}
Requirements
- HTTP Endpoints: No additional requirements
- Python Scripts: Python 3.8+ with required packages installed
- Pythonx Integration: Elixir pythonx package handles Python environment automatically
Authentication
Vertex AI
# Local development
gcloud auth application-default login
# Production
export GOOGLE_APPLICATION_CREDENTIALS="/path/to/service-account.json"
Environment Variables
# .env file
GOOGLE_CLOUD_PROJECT=your-project-id
OPENAI_API_KEY=your-openai-key
ANTHROPIC_API_KEY=your-anthropic-key
DEEPSEEK_API_KEY=your-deepseek-key
AWS_ACCESS_KEY_ID=your-aws-key
AWS_SECRET_ACCESS_KEY=your-aws-secret
Configuration Options
client = AxiomAi.new(:vertex_ai, %{
project_id: "your-project",
model: "gemini-1.5-pro", # optional, default: "gemini-1.5-pro"
region: "us-central1", # optional, default: "us-central1"
temperature: 0.7, # optional, default: 0.7
max_tokens: 1000, # optional, default: 65536 for chat, 1024 for completion
top_k: 40, # optional, default: 40
top_p: 0.95 # optional, default: 0.95
})
Streaming Support
AxiomAI supports streaming responses for real-time text generation. Currently implemented for Vertex AI:
# Simple streaming
client = AxiomAi.new(:vertex_ai, %{project_id: "your-project"})
{:ok, stream} = AxiomAi.stream(client, "Tell me a story")
# Process the stream
stream
|> Enum.each(fn
{:chunk, chunk} -> IO.write(chunk)
{:status, code} -> IO.puts("Status: #{code}")
{:headers, headers} -> IO.inspect(headers)
{:error, reason} -> IO.puts("Error: #{inspect(reason)}")
end)
# Streaming with conversation history
{:ok, stream} = AxiomAi.stream(client, "You are a helpful assistant", [], "Hello!")
Streaming Status:
- ✅ Vertex AI: Full streaming support
- ❌ OpenAI: Not implemented yet
- ❌ Anthropic: Not implemented yet
- ❌ DeepSeek: Not implemented yet
- ❌ Bedrock: Not implemented yet
- ❌ Local: Not implemented yet
Credentials Configuration
Vertex AI Service Account Setup
Method 1: Service Account File Path
config = %{
project_id: "your-gcp-project",
service_account_path: "/path/to/service-account.json",
model: "gemini-1.5-pro",
region: "us-central1"
}
client = AxiomAi.new(:vertex_ai, config)
Method 2: Service Account Key Data
# Load credentials from file
{:ok, creds_json} = File.read("/path/to/service-account.json")
{:ok, creds_map} = Jason.decode(creds_json)
config = %{
project_id: "your-gcp-project",
service_account_key: creds_map,
model: "gemini-1.5-pro",
region: "us-central1"
}
client = AxiomAi.new(:vertex_ai, config)
Method 3: Direct Access Token
config = %{
project_id: "your-gcp-project",
access_token: "ya29.your-access-token",
model: "gemini-1.5-pro",
region: "us-central1"
}
client = AxiomAi.new(:vertex_ai, config)
Method 4: Application Default Credentials (ADC)
# Automatically detects environment and uses appropriate method:
# - Cloud Run/GCE: Uses metadata service
# - Local: Uses gcloud CLI
config = %{
project_id: "your-gcp-project",
model: "gemini-1.5-pro",
region: "us-central1"
}
client = AxiomAi.new(:vertex_ai, config)
Cloud Run Deployment
✅ Recommended: Use the default service account
# Cloud Run automatically provides credentials via metadata service
config = %{
project_id: "your-gcp-project",
model: "gemini-1.5-pro"
}
The library automatically detects Cloud Run environment and uses the metadata service for authentication. No additional configuration needed.
Alternative: Mount service account file
# In your Dockerfile
COPY service-account.json /app/credentials.json
# In your application
config = %{
project_id: "your-gcp-project",
service_account_path: "/app/credentials.json",
model: "gemini-1.5-pro"
}
Required IAM Permissions
Ensure your service account has the following roles:
roles/aiplatform.user
- For Vertex AI API accessroles/ml.developer
- For ML model operations (optional)
# Grant permissions
gcloud projects add-iam-policy-binding your-gcp-project \
--member="serviceAccount:your-service-account@your-gcp-project.iam.gserviceaccount.com" \
--role="roles/aiplatform.user"
License
MIT License