erllama_model_backend behaviour (erllama v0.3.0)

View Source

Behaviour describing the operations the erllama_model gen_statem needs from a backing inference engine.

Two backends ship in v0.2:

erllama_model_stub — deterministic phash2-based stubs; used

  by tests that don't have a GGUF on disk.

erllama_model_llama — real llama.cpp via the NIF.

Future backends (mock for fault injection, remote for distributed inference, etc.) can plug in via this same surface.

Summary

Types

chat_message()

-type chat_message() :: #{role := binary(), content := binary() | [map()]}.

chat_request()

-type chat_request() ::
          #{messages := [chat_message()],
            system => binary() | undefined,
            tools => [chat_tool()] | undefined}.

chat_tool()

-type chat_tool() :: #{name := binary(), description => binary(), schema => map()}.

sampler_opts()

-type sampler_opts() ::
          #{grammar => binary(),
            repetition_penalty => float(),
            top_k => non_neg_integer(),
            top_p => float(),
            min_p => float(),
            temperature => float(),
            seed => non_neg_integer()}.

sampler_ref()

-type sampler_ref() :: term().

seq_id()

-type seq_id() :: non_neg_integer().

state()

-type state() :: term().

step_op()

-type step_op() :: {prefill, [erllama_nif:token_id()]} | {decode, sampler_ref()}.

step_result()

-type step_result() ::
          prefilled |
          {token, erllama_nif:token_id(), 0 | 1} |
          {thinking_token, erllama_nif:token_id()} |
          thinking_end.

Callbacks

apply_adapters/2

(optional)
-callback apply_adapters(state(), [{term(), float()}]) -> {ok, state()} | {error, term()}.

apply_chat_template/2

(optional)
-callback apply_chat_template(state(), Request :: chat_request()) ->
                                 {ok, [erllama_nif:token_id()]} | {error, term()}.

clear_sampler/1

(optional)
-callback clear_sampler(state()) -> {ok, state()} | {error, term()}.

configure_sampler/2

(optional)
-callback configure_sampler(state(), sampler_opts()) -> {ok, state()} | {error, term()}.

decode_one/2

-callback decode_one(state(), ContextTokens :: [erllama_nif:token_id()]) ->
                        {ok, erllama_nif:token_id()} | {eog, erllama_nif:token_id()} | {error, term()}.

detokenize/2

-callback detokenize(state(), [erllama_nif:token_id()]) -> binary() | {error, term()}.

embed/2

(optional)
-callback embed(state(), [erllama_nif:token_id()]) -> {ok, [float()]} | {error, term()}.

extra_metadata/1

(optional)
-callback extra_metadata(state()) ->
                            #{model_size_bytes => non_neg_integer(),
                              total_layers => non_neg_integer(),
                              n_gpu_layers => integer()}.

init(Config)

-callback init(Config :: map()) -> {ok, state()} | {error, term()}.

kv_pack/2

-callback kv_pack(state(), Tokens :: [erllama_nif:token_id()]) -> binary() | {error, term()}.

kv_pack/3

(optional)
-callback kv_pack(state(), Tokens :: [erllama_nif:token_id()], seq_id()) -> binary() | {error, term()}.

kv_unpack/2

-callback kv_unpack(state(), Bin :: binary()) -> ok | {error, term()}.

kv_unpack/3

(optional)
-callback kv_unpack(state(), Bin :: binary(), seq_id()) -> ok | {error, term()}.

load_adapter/2

(optional)
-callback load_adapter(state(), Path :: iodata()) -> {ok, term(), state()} | {error, term()}.

prefill/2

-callback prefill(state(), [erllama_nif:token_id()]) -> ok | {error, term()}.

sampler_free/1

(optional)
-callback sampler_free(sampler_ref()) -> ok | {error, term()}.

sampler_new/2

(optional)
-callback sampler_new(state(), sampler_opts()) -> {ok, sampler_ref()} | {error, term()}.

seq_clear/1

(optional)
-callback seq_clear(state()) -> ok | {error, term()}.

seq_rm/2

(optional)
-callback seq_rm(state(), seq_id()) -> ok | {error, term()}.

seq_rm_last/2

(optional)
-callback seq_rm_last(state(), NTokens :: pos_integer()) -> ok | {error, term()}.

seq_rm_last/3

(optional)
-callback seq_rm_last(state(), seq_id(), NTokens :: pos_integer()) -> ok | {error, term()}.

set_grammar/2

(optional)
-callback set_grammar(state(), Grammar :: binary() | undefined) -> {ok, state()} | {error, term()}.

step/2

(optional)
-callback step(state(), [{seq_id(), step_op()}]) -> {ok, [{seq_id(), step_result()}]} | {error, term()}.

terminate/1

-callback terminate(state()) -> ok.

thinking_signature/2

(optional)
-callback thinking_signature(state(), seq_id()) -> binary().

tokenize/2

-callback tokenize(state(), Text :: binary()) -> [erllama_nif:token_id()] | {error, term()}.

unload_adapter/2

(optional)
-callback unload_adapter(state(), Handle :: term()) -> {ok, state()} | {error, term()}.

verify/4

(optional)
-callback verify(state(),
                 PrefixTokens :: [erllama_nif:token_id()],
                 Candidates :: [erllama_nif:token_id()],
                 K :: pos_integer()) ->
                    {ok,
                     AcceptedCount :: non_neg_integer(),
                     NextToken :: erllama_nif:token_id() | eos,
                     NewState :: state()} |
                    {error, term()}.