Thanks for the suggestion, Gabriel.
It is indeed less tiresome that i had imagined.
I attach the corresponding full code (with original camlp4 formulation in comment).
Handling of the minus operator is here handled before parsing; this is not a problem since such a parser is supposed to be called on very small strings in practice.
Jocelyn
ps : i’m still wondering whether some library and/or ppx-based generic (afap) implementation of Camlp4 stream parsers is possible..
8<—— Stream-based parser for simple arithmetic expressions (with non-negative integers)
type ident = string
type value = int
type t =
EConst of value (** Constants *)
| EVar of ident (** Input, output or local variable *)
| EBinop of string * t * t (** Binary operation *)
let keywords = ["+"; "-"; "*"; "/"; "("; ")"]
let mk_binary_minus s = s |> String.split_on_char '-' |> String.concat " - "
let lexer s = s |> mk_binary_minus |> Stream.of_string |> Genlex.make_lexer keywords
open Genlex
(* let rec p_exp0 = parser
* | [< 'Int n >] -> EConst n
* | [< 'Ident i >] -> EVar i
* | [< 'Kwd "("; e=p_exp ; 'Kwd ")" >] -> e *)
let rec p_exp0 s =
match Stream.next s with
| Int n -> EConst n
| Ident i -> EVar i
| Kwd "(" ->
let e = p_exp s in
begin match Stream.peek s with
| Some (Kwd ")") -> Stream.junk s; e
| _ -> raise Stream.Failure
end
| _ -> raise Stream.Failure
(* and p_exp1 = parser
* | [< e1=p_exp0 ; rest >] -> p_exp2 e1 rest *)
and p_exp1 s =
let e1 = p_exp0 s in
p_exp2 e1 s
(* and p_exp2 e1 = parser
* | [< 'Kwd "*"; e2=p_exp1 >] -> EBinop("*", e1, e2)
* | [< 'Kwd "/"; e2=p_exp1 >] -> EBinop("/", e1, e2)
* | [< >] -> e1 *)
and p_exp2 e1 s =
match Stream.peek s with
| Some (Kwd "*") -> Stream.junk s; let e2 = p_exp1 s in EBinop("*", e1, e2)
| Some (Kwd "/") -> Stream.junk s; let e2 = p_exp1 s in EBinop("/", e1, e2)
| _ -> e1
(* and p_exp = parser
* | [< e1=p_exp1 ; rest >] -> p_exp3 e1 rest *)
and p_exp s =
let e1 = p_exp1 s in p_exp3 e1 s
(* and p_exp3 e1 = parser
* | [< 'Kwd "+"; e2=p_exp >] -> EBinop("+", e1, e2)
* | [< 'Kwd "-"; e2=p_exp >] -> EBinop("-", e1, e2)
* | [< >] -> e1 *)
and p_exp3 e1 s =
match Stream.peek s with
| Some (Kwd "+") -> Stream.junk s; let e2 = p_exp s in EBinop("+", e1, e2)
| Some (Kwd "-") -> Stream.junk s; let e2 = p_exp s in EBinop("-", e1, e2)
| _ -> e1
let parse s = s |> lexer |> p_exp
Le 25 juil. 2019 à 13:42, Gabriel Scherer a écrit :
> Hi,
>
> The parser from https://github.com/jserot/lascar/blob/master/src/lib/fsm_expr.ml seems fairly trivial, have you considered just rewriting it to use the functions of the Stream primitives directly?
>
> For example, roughly (I have not tried to type-check or test the code),
>
> let rec aux = parser
> | [< 'Int n when n<0; t=aux >] -> [< 'Kwd "-"; 'Int (-n); t >]
> | [< 'h; t=aux >] -> [< 'h; t >]
> | [< >] -> [< >] in
>
> would become
>
> let aux s =
> let next = ref [] in
> Stream.from @@ fun _ ->
> match !next with
> | tok::toks -> next := toks; Some tok
> | [] -> match Stream.next with
> | exception Stream.Failure -> None
> | Int n when n < 0 ->
> next := [Int (-n)];
> Some (Kwd "-")
> | tok -> Some tok
>
> and
>
> let rec p_exp0 = parser
> | [< 'Int n >] -> EConst n
> | [< 'Ident i >] -> EVar i
> | [< 'Kwd "("; e=p_exp ; 'Kwd ")" >] -> e
>
> becomes
>
> let rec p_exp0 s = match Stream.next s with
> | Int n -> EConst n
> | Ident i -> EVar i
> | Kwd "(" ->
> let e = p_exp s in
> match Stream.peek s with
> | Some (Kwd ")") -> Stream.junk s; e
> | _ -> raise Stream.Failure
>
> This is not exactly exciting code to write, but it's not a lot of work either for such a simple grammar.
>
> On Thu, Jul 25, 2019 at 12:28 PM Jocelyn Sérot wrote:
> HI Daniil,
>
> Thanks for the example. It clearly shows how to embed a Menhir-specified parser into an existing program.
>
> I still think, however that using Menhir for parsing arithmetic expressions is a bit overkill.
>
> I’m having a look at Angstrom (and all the other parser combinator libs cited on the corresp. page).
> It seems simpler.
>
> Jocelyn
>
> Le 24 juil. 2019 à 17:31, Daniil Baturin a écrit :
>
> > Hi Jocelyn,
> >
> > I've completed the first version of my project, so now I can start
> > looking into this again!
> >
> > There's a third option: parser combinators like angstrom.
> > My experience with Menhir is very positive though. After initial
> > struggle, I came to like its new incremental API and declarative error
> > reporting.
> >
> > Here's my parser for an extended BNF:
> > Menhir grammar:
> > https://github.com/dmbaturin/bnfgen/blob/master/src/bnf_parser.mly
> > Parser driver that feeds it tokens:
> > https://github.com/dmbaturin/bnfgen/blob/master/src/parse_bnf.ml
> > Error messages:
> > https://github.com/dmbaturin/bnfgen/blob/master/src/bnf_parser.messages
> > Error message module build:
> > https://github.com/dmbaturin/bnfgen/blob/master/src/dune#L6-L8
> >
> > On 7/24/19 10:10 PM, Jocelyn Sérot wrote:
> >> Hi Daniil (and everyone interested by the subject),
> >>
> >> Did you have a closer look at this ?
> >>
> >> I’m still hesitating between these three approaches for replacing the implementation of the small arithm expression parser used in Lascar [1] :
> >>
> >> i. rewrite it using the basic fns provided by the Stream library (pro: no additionnal dependency, cons: not so trivial..)
> >>
> >> ii. replace camlp4 by camlp5 (pro: straightforward, cons: long term maintainability of camlp5 (?))
> >>
> >> iii. rewrite it using ocamlex/menhir and embed it in the main code (pro: « standard » soon; cons: a bit heavy)
> >>
> >> Jocelyn
> >>
> >> [1] https://github.com/jserot/lascar/blob/master/src/lib/fsm_expr.ml, lines 70–112
> >>
> >> Le 2 juil. 2019 à 11:25, Daniil Baturin a écrit :
> >>
> >>> Hi Jocelyn,
> >>> Camlp5 is still sort of maintained, but I don't think it's going to be
> >>> developed beyond compatibility updates.
> >>> For syntax extensions, everyone is switching to PPX.
> >>>
> >>> From a quick look, it seems like the only bit of camlp4 you use is
> >>> stream expressions.
> >>> This is one of the things PPX can't do (on purpose, since it doesn't
> >>> allow _arbitrary_ extensions),
> >>> but I don't think just using streams directly is going to make code much
> >>> longer.
> >>>
> >>> Or I missed some other camlp4 bits?
> >>>
> >>> I'm ready to work on a patch if you are open to it.
> >>>
> >>> On 7/2/19 1:44 PM, Jocelyn Sérot wrote:
> >>>> Le 29 juin 2019 à 17:15, Daniil Baturin a écrit :
> >>>>
> >>>>> Perhaps we should make some coordinated effort to help them.
> >>>>> I've just sent a pull request to the ocamldot maintainer that enables
> >>>>> the graphviz files parsing and printing modules
> >>>>> to build and work with 4.08. The GTK parts have their own issues.
> >>>>> Next I'm going to look into LASCAR/RFSM (packages that interest me first ;).
> >>>>>
> >>>> Hi Daniil,
> >>>>
> >>>> I’ve been been thinking of removing the dependency of Lascar and RFSM on camlp4 for a while.
> >>>> Is switching to CamlP5 a good alternative ?
> >>>>
> >>>> Jocelyn
> >>>>
> >>>>
> >>>
> >
> >
>
>