Module tok

Data Types

tokenizer()

abstract datatype: tokenizer()

Function Index

count_tokens/2
decode/2
encode/2
encode/3
encode_batch/2
encode_batch/3
load/1
vocab_size/1

Function Details

count_tokens/2

count_tokens(Tok::tokenizer(), Text::binary()) -> non_neg_integer()

decode/2

decode(X1::tokenizer(), Ids::[integer()]) -> binary()

encode/2

encode(Tok::tokenizer(), Text::binary()) -> {InputIds::binary(), AttentionMask::binary(), TokenTypeIds::binary()}

encode/3

encode(Tok::tokenizer(), Text::binary(), Opts::#{add_special_tokens => boolean()}) -> {InputIds::binary(), AttentionMask::binary(), TokenTypeIds::binary()}

encode_batch/2

encode_batch(Tok::tokenizer(), Texts::[binary()]) -> [{binary(), binary(), binary()}]

encode_batch/3

encode_batch(Tok::tokenizer(), Texts::[binary()], Opts::#{add_special_tokens => boolean()}) -> [{binary(), binary(), binary()}]

load/1

load(Path::file:filename()) -> {ok, tokenizer()} | {error, term()}

vocab_size/1

vocab_size(X1::tokenizer()) -> integer()


Generated by EDoc