ExAequoBase.RegexParser (ExAequoBase v0.1.6)

View Source

Parses an input string according to a list of regexen and associated actions

iex(1)> rgxen = [ 
...(1)> {~r/\A\s+/, ""}, # ignored, sort of
...(1)> {~r/\A\d+/, &String.to_integer/1},
...(1)> {~r/\A(,),/, ","},
...(1)> {~r/\A(,)/, :comma},
...(1)> {~r/\A(.+?)(?=,)/}
...(1)> ]
...(1)> parse(rgxen, " 42alpha,beta,, , ")
["", 42, "alpha", :comma, "beta", ",", "", :comma, ""]

In case we want to eliminate empty strings immediately we can use the special atom :ignore

iex(2)> rgxen = [ 
...(2)> {~r/\A\s+/, :ignore}, # ignored, sort of
...(2)> {~r/\A\d+/, &String.to_integer/1},
...(2)> {~r/\A(,),/, ","},
...(2)> {~r/\A(,)/, :comma},
...(2)> {~r/\A(.+?)(?=,)/}
...(2)> ]
...(2)> parse(rgxen, " 42alpha,beta,, , ")
[42, "alpha", :comma, "beta", ",", :comma]

It might be more efficent to tell the parser where the rest is

iex(3)> parse([{~r/\A(.)(.*)/}], "hello")
["h", "e", "l", "l", "o"]

We can also parse keywords

iex(4)> parse([{"hell"}, {"o"}], "hello")
["hell", "o"]

But we need to parse all parts of a string

iex(5)> assert_raise(Error, fn -> parse([{"a"}], "ab") end)

Summary

Types

atoms()

@type atoms() :: [atom()]

binaries()

@type binaries() :: [binary()]

either(lt, rt)

@type either(lt, rt) :: ok_t(lt) | error_t(rt)

error_t()

@type error_t() :: {:error, binary()}

error_t(t)

@type error_t(t) :: {:error, t}

input_source_t()

@type input_source_t() :: Enumerable.t() | binary() | binaries()

maybe(t)

@type maybe(t) :: nil | t

natural()

@type natural() :: non_neg_integer()

numbered(t)

@type numbered(t) :: {t, number()}

numbered_line_t()

@type numbered_line_t() :: numbered(binary())

numbered_lines_t()

@type numbered_lines_t() :: [numbered_line_t()]

ok_t()

@type ok_t() :: {:ok, any()}

ok_t(t)

@type ok_t(t) :: {:ok, t}

pair_t()

@type pair_t() :: {any(), any()}

pair_t(t)

@type pair_t(t) :: {t, t}

pair_t(lt, rt)

@type pair_t(lt, rt) :: {lt, rt}

pairs_t()

@type pairs_t() :: [pair_t()]

pairs_t(t)

@type pairs_t(t) :: [pair_t(t)]

pairs_t(lt, rt)

@type pairs_t(lt, rt) :: [pair_t(lt, rt)]

parser_fn()

@type parser_fn() :: (binary() -> any())

reducer_result_t()

@type reducer_result_t() :: {:halt, error_t()} | {:cont, ok_t()}

result_fun_t()

@type result_fun_t() :: (any() -> result_t())

result_fun_t(t)

@type result_fun_t(t) :: (any() -> result_t(t))

result_t()

@type result_t() :: either(any(), binary())

result_t(t)

@type result_t(t) :: either(t, binary())

rgx_pair()

@type rgx_pair() :: {Regex.t(), ExAequoFn.NamedFn.t()}

rgx_pairs()

@type rgx_pairs() :: [rgx_pair()]

spec_t()

@type spec_t() :: pair_t(binary() | Regex.t(), any())

specs_t()

@type specs_t() :: [spec_t()]

stream_t()

@type stream_t() ::
  %IO.Stream{device: term(), line_or_bytes: term(), raw: term()}
  | %File.Stream{
      line_or_bytes: term(),
      modes: term(),
      node: term(),
      path: term(),
      raw: term()
    }

zero_fn_t()

@type zero_fn_t() :: (-> any())

zero_fn_t(t)

@type zero_fn_t(t) :: (-> t)

Functions

parse(rgxen, input)

@spec parse(specs_t(), binary()) :: list()