Deepgram Elixir SDK
View SourceUnofficial Elixir SDK for Deepgram. Power your apps with world-class speech and Language AI models.
Features
- Speech-to-Text: Convert audio to text with high accuracy
- Text-to-Speech: Generate natural-sounding speech from text
- Text Intelligence: Extract insights from text with sentiment analysis, topic detection, and more
- AI Voice Agent: Build conversational AI applications
- Management API: Manage projects, API keys, and usage
- WebSocket Support: Real-time streaming for live applications
- Async/Await: Full support for asynchronous operations
Installation
Add deepgram
to your list of dependencies in mix.exs
:
def deps do
[
{:deepgram, "~> 0.1"}
]
end
Configuration
Set your Deepgram API key as an environment variable:
export DEEPGRAM_API_KEY="your-deepgram-api-key"
Or configure it in your application:
config :deepgram,
api_key: System.get_env("DEEPGRAM_API_KEY")
Quick Start
# Create a client
client = Deepgram.new(api_key: "your-api-key")
# Transcribe audio from URL
{:ok, result} = Deepgram.Listen.transcribe_url(client, %{url: "https://example.com/audio.wav"})
# Synthesize speech
{:ok, audio_data} = Deepgram.Speak.synthesize(client, %{text: "Hello, world!"})
# Analyze text
{:ok, analysis} = Deepgram.Read.analyze(client, %{text: "I love this product!"}, %{sentiment: true})
Documentation
Speech-to-Text
Prerecorded Audio
# From URL
{:ok, result} = Deepgram.Listen.transcribe_url(client,
%{url: "https://example.com/audio.wav"},
%{model: "nova-2", punctuate: true, diarize: true}
)
# From file
{:ok, audio_data} = File.read("path/to/audio.wav")
{:ok, result} = Deepgram.Listen.transcribe_file(client, audio_data, %{model: "nova-2"})
# With callback (async)
{:ok, result} = Deepgram.Listen.transcribe_url_callback(client,
%{url: "https://example.com/audio.wav"},
"https://your-callback-url.com/webhook",
%{model: "nova-2"}
)
Live Audio Streaming
# Start WebSocket connection
{:ok, websocket} = Deepgram.Listen.live_transcription(client, %{
model: "nova-2",
interim_results: true,
punctuate: true
})
# Send audio data
Deepgram.Listen.WebSocket.send_audio(websocket, audio_chunk)
# Handle messages
receive do
{:deepgram_result, result} ->
IO.puts("Transcript: #{result["channel"]["alternatives"] |> hd |> Map.get("transcript")}")
{:deepgram_error, error} ->
IO.puts("Error: #{inspect(error)}")
end
Text-to-Speech
Basic Synthesis
# Generate audio
{:ok, audio_data} = Deepgram.Speak.synthesize(client,
%{text: "Hello, world!"},
%{model: "aura-2-thalia-en", encoding: "linear16"}
)
# Save to file
{:ok, response} = Deepgram.Speak.save_to_file(client, "output.wav",
%{text: "Hello, world!"},
%{model: "aura-2-thalia-en"}
)
Live Speech Synthesis
# Start WebSocket connection
{:ok, websocket} = Deepgram.Speak.live_synthesis(client, %{
model: "aura-2-thalia-en",
encoding: "linear16",
sample_rate: 16000
})
# Send text to synthesize
Deepgram.Speak.WebSocket.send_text(websocket, "Hello, this is streaming text-to-speech!")
# Handle audio data
receive do
{:deepgram_audio, audio_data} ->
# Play or save audio data
File.write("output.wav", audio_data, [:append])
end
Text Intelligence
# Sentiment analysis
{:ok, result} = Deepgram.Read.analyze_sentiment(client, %{text: "I love this product!"})
# Topic detection
{:ok, result} = Deepgram.Read.analyze_topics(client, %{text: "Let's discuss machine learning."})
# Intent recognition
{:ok, result} = Deepgram.Read.analyze_intents(client, %{text: "I want to cancel my subscription."})
# Text summarization
{:ok, result} = Deepgram.Read.summarize(client, %{text: "Long text to summarize..."})
# Combined analysis
{:ok, result} = Deepgram.Read.analyze(client, %{text: "Analyze this text"}, %{
sentiment: true,
topics: true,
intents: true,
summarize: true
})
AI Voice Agent
# Configure agent
settings = %{
agent: %{
listen: %{
model: "nova-2",
language: "en",
smart_format: true,
encoding: "linear16",
sample_rate: 16000,
channels: 1,
provider: %{type: "deepgram"}
},
think: %{
provider: %{type: "open_ai", model: "gpt-4"},
instructions: "You are a helpful assistant."
},
speak: %{
model: "aura-2-thalia-en",
encoding: "linear16",
provider: %{type: "deepgram"}
}
},
greeting: "Hello! How can I help you today?"
}
# Start agent session
{:ok, agent} = Deepgram.Agent.start_session(client, settings)
# Send audio to agent
Deepgram.Agent.send_audio(agent, audio_chunk)
# Handle agent responses
receive do
{:deepgram_audio, audio_data} ->
# Play agent's speech
play_audio(audio_data)
{:deepgram_function_call_request, request} ->
# Handle function calls
result = handle_function_call(request)
Deepgram.Agent.respond_to_function_call(agent, request["function_call_id"], result)
end
Management API
# Get projects
{:ok, projects} = Deepgram.Manage.get_projects(client)
# Create API key
{:ok, key} = Deepgram.Manage.create_key(client, project_id, %{
comment: "My API key",
scopes: ["usage:read", "usage:write"]
})
# Get usage
{:ok, usage} = Deepgram.Manage.get_usage_summary(client, project_id)
# Get balances
{:ok, balances} = Deepgram.Manage.get_balances(client, project_id)
Error Handling
The SDK uses tagged tuples for error handling:
case Deepgram.Listen.transcribe_url(client, %{url: "invalid-url"}) do
{:ok, result} ->
# Handle success
IO.puts("Transcription: #{result}")
{:error, %Deepgram.Error.ApiError{status_code: 400, message: message}} ->
# Handle API error
IO.puts("API Error: #{message}")
{:error, %Deepgram.Error.HttpError{reason: reason}} ->
# Handle HTTP error
IO.puts("HTTP Error: #{reason}")
{:error, error} ->
# Handle other errors
IO.puts("Error: #{inspect(error)}")
end
Examples
This SDK includes comprehensive examples to help you get started with each feature:
Interactive Livebook Examples
Interactive notebooks for exploring SDK features:
- Speech-to-Text (Listen) Examples
- Text-to-Speech (Speak) Examples
- Text Intelligence (Read) Examples
- AI Voice Agent Examples
Script Examples
Standalone scripts for quick implementation:
For more examples and documentation, see the examples directory.
Development
# Install dependencies
mix deps.get
# Run tests
mix test
# Generate documentation
mix docs
# Run linter
mix credo
# Run type checker
mix dialyzer
# Check for compilation errors
mix compile --warnings-as-errors
Contributing
- Fork the repository
- Create your feature branch (
git checkout -b feature/amazing-feature
) - Commit your changes (
git commit -m 'Add some amazing feature'
) - Push to the branch (
git push origin feature/amazing-feature
) - Open a Pull Request
License
This project is licensed under the MIT License - see the LICENSE file for details.