abstract datatype: tokenizer()
| count_tokens/2 | |
| decode/2 | |
| encode/2 | |
| encode/3 | |
| encode_batch/2 | |
| encode_batch/3 | |
| load/1 | |
| vocab_size/1 |
count_tokens(Tok::tokenizer(), Text::binary()) -> non_neg_integer()
decode(X1::tokenizer(), Ids::[integer()]) -> binary()
encode(Tok::tokenizer(), Text::binary()) -> {InputIds::binary(), AttentionMask::binary(), TokenTypeIds::binary()}
encode(Tok::tokenizer(), Text::binary(), Opts::#{add_special_tokens => boolean()}) -> {InputIds::binary(), AttentionMask::binary(), TokenTypeIds::binary()}
encode_batch(Tok::tokenizer(), Texts::[binary()]) -> [{binary(), binary(), binary()}]
encode_batch(Tok::tokenizer(), Texts::[binary()], Opts::#{add_special_tokens => boolean()}) -> [{binary(), binary(), binary()}]
load(Path::file:filename()) -> {ok, tokenizer()} | {error, term()}
vocab_size(X1::tokenizer()) -> integer()
Generated by EDoc