Mix.install([
  {:ex_codecs, path: Path.join(__DIR__, "..")},
  {:kino, "~> 0.14"},
  {:vega_lite, "~> 0.1"}
])

Setup: Test Datasets

codecs = [:lz4, :snappy, :zstd, :bzip2, :blosc2]

random_data = :crypto.strong_rand_bytes(65536)

repetitive_text = String.duplicate("The quick brown fox jumps over the lazy dog. ", 2000)

semi_structured = Enum.join(for i <- 1..5000 do
  "id=#{i}&name=user#{rem(i, 100)}&score=#{:rand.uniform(1000)}&active=#{rem(i, 2) == 0}"
end, "&")

float_array = for i <- 1..8192, into: <<>>, do: <<i * 0.125::float-size(64)-little>>

datasets = %{
  "Random bytes" => random_data,
  "Repetitive text" => repetitive_text,
  "Semi-structured" => semi_structured,
  "Float64 array" => float_array
}

dataset_sizes = for {name, data} <- datasets, into: %{} do
  {name, byte_size(data)}
end

IO.puts("Dataset sizes:")
for {name, size} <- dataset_sizes do
  IO.puts("  #{String.pad_trailing(name, 20)} #{size} bytes")
end

Compression Ratio Benchmarks

compression_results = for {dname, data} <- datasets, codec <- codecs do
  opts = if codec == :blosc2, do: [cname: :zstd, clevel: 5, shuffle: :byte], else: []
  {:ok, enc} = ExCodecs.encode(codec, data, opts)
  %{
    dataset: dname,
    codec: inspect(codec),
    original: byte_size(data),
    compressed: byte_size(enc),
    ratio_pct: Float.round(100 * byte_size(enc) / byte_size(data), 1),
    savings_pct: Float.round(100 * (1 - byte_size(enc) / byte_size(data)), 1)
  }
end

Kino.DataTable.new(compression_results)

Compression Ratio Chart

VegaLite.new(width: 700, height: 350)
|> VegaLite.data_from_values(compression_results)
|> VegaLite.mark(:bar)
|> VegaLite.encode_field(:x, "codec", type: :nominal, sort: ["lz4", "snappy", "zstd", "bzip2", "blosc2"])
|> VegaLite.encode_field(:y, "ratio_pct", type: :quantitative, title: "Compressed Size (%)", scale: [domain: [0, 110]])
|> VegaLite.encode_field(:color, "codec", type: :nominal)
|> VegaLite.encode_field(:column, "dataset", type: :nominal)
|> Kino.VegaLite.new()

Space Savings Chart

VegaLite.new(width: 700, height: 350)
|> VegaLite.data_from_values(compression_results)
|> VegaLite.mark(:bar)
|> VegaLite.encode_field(:x, "codec", type: :nominal, sort: ["lz4", "snappy", "zstd", "bzip2", "blosc2"])
|> VegaLite.encode_field(:y, "savings_pct", type: :quantitative, title: "Space Saved (%)")
|> VegaLite.encode_field(:color, "codec", type: :nominal)
|> VegaLite.encode_field(:column, "dataset", type: :nominal)
|> Kino.VegaLite.new()

Speed Benchmarks

iterations = 20

speed_results = for {dname, data} <- datasets, codec <- codecs do
  opts = if codec == :blosc2, do: [cname: :zstd, clevel: 5, shuffle: :byte], else: []
  {:ok, enc} = ExCodecs.encode(codec, data, opts)

  {enc_time, _} = :timer.tc(fn ->
    for _ <- 1..iterations, do: ExCodecs.encode(codec, data, opts)
  end)

  {dec_time, _} = :timer.tc(fn ->
    for _ <- 1..iterations, do: ExCodecs.decode(codec, enc)
  end)

  enc_throughput = Float.round(byte_size(data) * iterations / enc_time, 1)
  dec_throughput = Float.round(byte_size(data) * iterations / dec_time, 1)

  %{
    dataset: dname,
    codec: inspect(codec),
    encode_time_us: div(enc_time, iterations),
    decode_time_us: div(dec_time, iterations),
    encode_mbps: Float.round(enc_throughput, 1),
    decode_mbps: Float.round(dec_throughput, 1)
  }
end

Kino.DataTable.new(speed_results)

Encode Speed Chart

VegaLite.new(width: 700, height: 350)
|> VegaLite.data_from_values(speed_results)
|> VegaLite.mark(:bar)
|> VegaLite.encode_field(:x, "codec", type: :nominal, sort: ["lz4", "snappy", "zstd", "bzip2", "blosc2"])
|> VegaLite.encode_field(:y, "encode_time_us", type: :quantitative, title: "Encode Time (µs)")
|> VegaLite.encode_field(:color, "codec", type: :nominal)
|> VegaLite.encode_field(:column, "dataset", type: :nominal)
|> Kino.VegaLite.new()

Decode Speed Chart

VegaLite.new(width: 700, height: 350)
|> VegaLite.data_from_values(speed_results)
|> VegaLite.mark(:bar)
|> VegaLite.encode_field(:x, "codec", type: :nominal, sort: ["lz4", "snappy", "zstd", "bzip2", "blosc2"])
|> VegaLite.encode_field(:y, "decode_time_us", type: :quantitative, title: "Decode Time (µs)")
|> VegaLite.encode_field(:color, "codec", type: :nominal)
|> VegaLite.encode_field(:column, "dataset", type: :nominal)
|> Kino.VegaLite.new()

Memory Usage

memory_results = for codec <- codecs do
  opts = if codec == :blosc2, do: [cname: :zstd, clevel: 5, shuffle: :byte], else: []
  {:ok, info} = ExCodecs.codec_info(codec)

  mem_before = Process.info(self(), :heap_size) |> elem(1)
  {:ok, enc} = ExCodecs.encode(codec, float_array, opts)
  {:ok, _dec} = ExCodecs.decode(codec, enc)
  mem_after = Process.info(self(), :heap_size) |> elem(1)

  %{
    codec: inspect(codec),
    category: info.category,
    configurable: info.configurable?,
    streaming: info.streaming?,
    heap_growth_words: mem_after - mem_before
  }
end

Kino.DataTable.new(memory_results)

Codec Profiles

profile_data = %{
  "Speed King" => %{
    best: [:lz4, :snappy],
    why: "Fastest encode/decode, ideal for hot paths, caching, and real-time systems"
  },
  "Balanced" => %{
    best: [:zstd],
    why: "Good ratio with fast decompression. Default choice for most workloads"
  },
  "Maximum Ratio" => %{
    best: [:bzip2],
    why: "Smallest output. Accept slower speed for archival and cold storage"
  },
  "Numeric Arrays" => %{
    best: [:blosc2],
    why: "Shuffle+compress slashes size of typed data. Threaded for large arrays"
  }
}

for {profile, %{best: best, why: why}} <- profile_data do
  IO.puts("## #{profile}")
  IO.puts("  Codecs: #{inspect(best)}")
  IO.puts("  #{why}\n")
end

Interactive Codec Selector

use_case = Kino.Input.select("Your use case:", [
  "Real-time / low latency" => :speed,
  "General purpose" => :balanced,
  "Maximum compression / archival" => :ratio,
  "Numerical / scientific data" => :numeric,
  "Small payloads / low overhead" => :tiny
])

data_type = Kino.Input.select("Data type:", [
  "Text / JSON" => :text,
  "Binary blobs" => :binary,
  "Typed arrays (floats, ints)" => :array,
  "Mixed" => :mixed
])

Kino.Layout.grid([use_case, data_type], columns: 2)
use_case_val = Kino.Input.read(use_case)
data_type_val = Kino.Input.read(data_type)

recommendation = case {use_case_val, data_type_val} do
  {:speed, _} -> {:lz4, "Fastest compression/decompression. Minimal latency overhead."}
  {:tiny, _} -> {:snappy, "Low overhead even on very small payloads. No configuration needed."}
  {:ratio, :array} -> {:blosc2, "Shuffle+compress gives best ratios on typed arrays."}
  {:ratio, _} -> {:bzip2, "Highest compression ratio for general data. Slow but compact."}
  {:numeric, _} -> {:blosc2, "Purpose-built for numerical data with shuffle filters and threading."}
  {:balanced, :array} -> {:blosc2, "Good ratio on typed data with decent speed."}
  {:balanced, _} -> {:zstd, "Best all-around codec. Configurable from fast (level 1) to compact (level 22)."}
end

{codec, reason} = recommendation
{:ok, info} = ExCodecs.codec_info(codec)

IO.puts("Recommended codec: #{inspect(codec)}")
IO.puts("Reason: #{reason}")
IO.puts("Configurable: #{info.configurable?}")
IO.puts("Streaming: #{info.streaming?}")

default_opts = case codec do
  :zstd -> [level: 3]
  :lz4 -> [level: 1]
  :bzip2 -> [block_size: 9]
  :blosc2 -> [cname: :zstd, clevel: 5, shuffle: :byte]
  :snappy -> []
end
IO.puts("Suggested options: #{inspect(default_opts)}")

Decision Flowchart

flowchart = """
When choosing a codec, follow this decision path:

1. Is your data typed numerical arrays?
   YES → Use Blosc2 (with appropriate shuffle and typesize)
   NO  → Continue

2. Is latency critical (hot path, real-time)?
   YES → Use LZ4 (fastest) or Snappy (low overhead)
   NO  → Continue

3. Is storage cost the primary concern?
   YES → Use Bzip2 (best ratio) or Zstd with high level
   NO  → Continue

4. Default choice:
   → Use Zstd (level 3)
   → Good ratio, fast decompression, configurable
"""

IO.puts(flowchart)

Codec Feature Matrix

FeatureLZ4SnappyZstdBzip2Blosc2
SpeedVery FastVery FastFastSlowMedium
RatioLowLowHighVery HighHigh (arrays)
ConfigurableLevel 1–16NoLevel 1–22Block 1–9Many options
StreamingNoNoYesNoYes
Best ForHot pathsShort dataGeneralArchivalArrays
ShuffleByte/Bit
Multi-threadNoNoNoNoYes

Next Steps