Elixir bindings for zvec, an in-process vector database. Zvex provides type-safe, idiomatic Elixir access to zvec's vector indexing and similarity search through Zig-based NIF bindings.

Features

  • Vector similarity search with HNSW, IVF, and flat index types
  • Multiple vector types -- fp16, fp32, fp64, int4, int8, int16, binary32, binary64, and sparse vectors
  • Distance metrics -- L2, inner product, cosine, and MIPS-L2
  • Quantization -- fp16, int8, and int4 for reduced memory usage
  • Schema-based collections with typed fields and index configuration
  • Document CRUD -- insert, update, upsert, delete, and fetch operations
  • Filtered search with scalar field predicates
  • Telemetry integration for observability
  • Structured errors via Splode

Installation

Add zvex to your dependencies in mix.exs:

def deps do
  [
    {:zvex, "~> 0.1.0"}
  ]
end

zvex ships prebuilt libzvec_c_api binaries for the targets listed below. On mix deps.compile, the matching binary is downloaded from the GitHub Releases for this repo and verified via SHA256. Zig is still required (the NIF compiles locally via Zigler).

Supported prebuilt targets

  • linux-x86_64-gnu (glibc >= 2.35 / Ubuntu 22.04+)
  • linux-aarch64-gnu
  • darwin-aarch64 (Apple Silicon)

Other targets (e.g. Windows, FreeBSD, darwin-x86_64, Alpine/musl) fall through to a source build requiring cmake + a C++ toolchain + the c_src/zvec git submodule.

Environment variables

VariableEffect
ZVEX_BUILD=trueSkip download, build libzvec_c_api from the vendored submodule.
ZVEX_BUILD_URLOverride the download prefix (private mirrors, airgapped environments).

Quick Start

# Initialize the library
Zvex.initialize!()

# Define a collection schema
schema =
  Zvex.Collection.Schema.new("my_collection")
  |> Zvex.Collection.Schema.add_field("embedding", :vector_fp32, dimension: 128, index: :hnsw)
  |> Zvex.Collection.Schema.add_field("title", :string, index: :invert)

# Create and open a collection
{:ok, collection} = Zvex.Collection.create("/tmp/my_collection", schema)

# Build and insert a document
doc =
  Zvex.Document.new()
  |> Zvex.Document.put_pk(1)
  |> Zvex.Document.put("embedding", Zvex.Vector.from_list(:fp32, List.duplicate(0.5, 128)))
  |> Zvex.Document.put("title", "Hello world")

:ok = Zvex.Collection.insert(collection, [doc])

# Query nearest neighbors
results =
  Zvex.Query.new()
  |> Zvex.Query.field("embedding")
  |> Zvex.Query.vector(Zvex.Vector.from_list(:fp32, List.duplicate(0.5, 128)))
  |> Zvex.Query.top_k(10)
  |> Zvex.Query.hnsw(ef: 100)
  |> Zvex.Query.output_fields(["title"])
  |> Zvex.Query.execute!(collection)

# Clean up
Zvex.Collection.close!(collection)
Zvex.Collection.drop!("/tmp/my_collection")
Zvex.shutdown!()

API Overview

Initialization

# Default configuration
Zvex.initialize!()

# Custom configuration
config =
  Zvex.Config.new()
  |> Zvex.Config.memory_limit(1_073_741_824)
  |> Zvex.Config.query_threads(4)
  |> Zvex.Config.optimize_threads(2)
  |> Zvex.Config.log(:console, level: :info)

Zvex.initialize!(config)

Schema Definition

Schemas define the structure and indexing of a collection.

schema =
  Zvex.Collection.Schema.new("products")
  |> Zvex.Collection.Schema.add_field("embedding", :vector_fp32,
    dimension: 768,
    index: :hnsw,
    metric: :cosine,
    m: 16,
    ef_construction: 200
  )
  |> Zvex.Collection.Schema.add_field("name", :string, index: :invert)
  |> Zvex.Collection.Schema.add_field("price", :double)
  |> Zvex.Collection.Schema.add_field("tags", :array_string)

Data Types

CategoryTypes
Dense vectorsvector_fp32, vector_fp16, vector_fp64, vector_int4, vector_int8, vector_int16, vector_binary32, vector_binary64
Sparse vectorssparse_vector_fp16, sparse_vector_fp32
Scalarsstring, int32, int64, uint32, uint64, float, double, bool, binary
Arraysarray_string, array_int32, array_int64, array_uint32, array_uint64, array_float, array_double, array_bool, array_binary

Index Types

IndexUse CaseKey Options
:hnswApproximate nearest neighbor search:metric, :m, :ef_construction, :quantize
:ivfLarge-scale partitioned search:metric, :n_list, :n_iters, :use_soar, :quantize
:flatExact brute-force search:metric, :quantize
:invertScalar field filtering:enable_range_opt, :enable_wildcard

Collections

# Create
{:ok, collection} = Zvex.Collection.create("/path/to/data", schema)

# Open existing
{:ok, collection} = Zvex.Collection.open("/path/to/data", schema)

# Open options
{:ok, collection} = Zvex.Collection.open("/path/to/data", schema,
  mmap: true,
  read_only: true,
  max_buffer_size: 67_108_864
)

# Maintenance
Zvex.Collection.flush(collection)
Zvex.Collection.optimize(collection)

# Introspection
{:ok, stats} = Zvex.Collection.stats(collection)
# => %Zvex.Collection.Stats{doc_count: 1000, indexes: [%{name: "embedding", completeness: 1.0}]}

# DDL operations
Zvex.Collection.create_index(collection, "new_field", index_params)
Zvex.Collection.drop_index(collection, "old_field")
Zvex.Collection.add_column(collection, "metadata", :string, "default_value")
Zvex.Collection.drop_column(collection, "metadata")

Documents

# Build documents
doc =
  Zvex.Document.new()
  |> Zvex.Document.put_pk(42)
  |> Zvex.Document.put("embedding", Zvex.Vector.from_list(:fp32, embedding_data))
  |> Zvex.Document.put("name", "Product A")
  |> Zvex.Document.put("price", 29.99)

# Build from a map (requires schema for type resolution)
doc = Zvex.Document.from_map(%{"pk" => 42, "name" => "Product A", "price" => 29.99}, schema)

# CRUD
Zvex.Collection.insert(collection, [doc1, doc2])
Zvex.Collection.update(collection, [updated_doc])
Zvex.Collection.upsert(collection, [doc])
Zvex.Collection.delete(collection, [42, 43])
Zvex.Collection.delete_by_filter(collection, "name = 'Product A'")

{:ok, docs} = Zvex.Collection.fetch(collection, [42])

Vectors

# Dense vectors
vec = Zvex.Vector.from_list(:fp32, [1.0, 2.0, 3.0])
vec = Zvex.Vector.from_binary(:fp16, binary_data)
list = Zvex.Vector.to_list(vec)
dim = Zvex.Vector.dimension(vec)

# Sparse vectors
vec = Zvex.Vector.from_sparse(:sparse_fp32, [0, 5, 10], [1.0, 0.5, 0.3])
{indices, values} = Zvex.Vector.to_sparse(vec)
nnz = Zvex.Vector.nnz(vec)

Queries

results =
  Zvex.Query.new()
  |> Zvex.Query.field("embedding")
  |> Zvex.Query.vector(query_vector)
  |> Zvex.Query.top_k(10)
  |> Zvex.Query.filter("price < 50.0")
  |> Zvex.Query.output_fields(["name", "price"])
  |> Zvex.Query.include_vector(true)
  |> Zvex.Query.hnsw(ef: 200)
  |> Zvex.Query.execute!(collection)

for result <- results do
  IO.puts("pk=#{result.pk} score=#{result.score} name=#{result.fields["name"]}")
end

Search algorithm options:

# HNSW - approximate, fast
|> Zvex.Query.hnsw(ef: 200)

# IVF - partitioned search
|> Zvex.Query.ivf(n_probe: 16)

# Flat - exact brute-force
|> Zvex.Query.flat()

Error Handling

All fallible functions come in two forms: fun/n returns {:ok, result} or {:error, error}, and fun!/n raises on failure.

Errors are structured via Splode into classes:

ClassErrors
InvalidArgument, FailedPrecondition
NotFoundNotFound
ConflictAlreadyExists
UnavailablePermissionDenied, ResourceExhausted, Unavailable, NotSupported
InternalInternalError
UnknownUnknown

Benchmarks

Run benchmarks with mix aliases:

mix bench.vector       # Vector packing/unpacking
mix bench.document     # Document creation and serialization
mix bench.collection   # Insert, upsert, delete, fetch
mix bench.query        # Vector search performance
mix bench.all          # Everything

Development

# Run tests
mix test

# Run quality checks
mix check

# Generate docs
mix docs

License

See LICENSE for details.