PyreAst.TaglessFinal
This module provides tagless-final interfaces for the syntax of Python.
Tagless-final style, also known as object algebra in OOP contexts, is a programming techinque of embedding domain-specific languages in a typed functional host language. Reading the associated literature to get a deeper understanding of the concept is definitely encouraged. But we will also include a short explanation here in order to facilitate basic understandings of the APIs.
Let's consider a small DSL containing both boolean and integer values as our running example. Conventional wisdom suggests the following patterns to construct and process the syntax of a language:
(* Syntax of DSL *)
type value = Int of int | Bool of bool
type expression =
| Constant of value
| Add of expression * expression
| IsEqual of expression * expression
(* An evaluator of the DSL, as an example of a downstream processing logic of the DSL's
syntax. *)
let rec eval = function
| Constant value -> value
| Add (lhs, rhs) -> (
match (eval lhs, eval rhs) with
| Int lhs, Int rhs -> Int (lhs + rhs)
| _ -> failwith "Runtime type error")
| IsEqual (lhs, rhs) -> (
match (eval lhs, eval rhs) with
| Int lhs, Int rhs -> Bool (lhs = rhs)
| Bool lhs, Bool rhs -> Bool (lhs = rhs)
| _ -> failwith "Runtime type error")
(* The program constructs an explicit AST for the DSL, then process it. *)
let construct_ast () =
IsEqual
( Constant (Bool true),
IsEqual (Add (Constant (Int 2), Constant (Int 3)), Constant (Int 4)) )
let _ =
let syntax_tree = construct_ast () in
eval syntax_tree
Note how the program first construct an explicit structure syntax_tree
first, and then send this structure to downstream processing logic so it can get pattern-matched on. If the same logic gets re-written in the tagless-final style, it would roughly look like this:
(* This definition is still needed, as the result type of [eval]. *)
type value = Int of int | Bool of bool
(* Syntax of DSL. Note the lack of concrete variant/record definitions -- syntax are 100%
defined as a collection of free-functions. *)
type 'e expression = {
constant : value -> 'e;
add : 'e -> 'e -> 'e;
is_equal : 'e -> 'e -> 'e;
}
(* An evaluator of the DSL. Note the lack of explicit pattern match on the syntax tree (which
is why this style is named "tagless"). *)
let eval : value expression =
let constant v = v in
let add lhs rhs =
match (lhs, rhs) with
| Int lhs, Int rhs -> Int (lhs + rhs)
| _ -> failwith "Runtime type error"
in
let is_equal lhs rhs =
match (lhs, rhs) with
| Int lhs, Int rhs -> Bool (lhs = rhs)
| Bool lhs, Bool rhs -> Bool (lhs = rhs)
| _ -> failwith "Runtime type error"
in
{ constant; add; is_equal }
(* The program does not constructs an explicit AST for the DSL. Processing of the DSL is
performed exactly when the DSL gets constructed. *)
let construct_and_eval { constant; add; is_equal } =
is_equal (constant (Bool true))
(is_equal (add (constant (Int 2)) (constant (Int 3))) (constant (Int 4)))
let _ = construct_and_eval eval
Note how the intermediate step of constructing an explicit syntax_tree
structure is completely eliminated -- we specify the downstream processing logic on top of the syntatical structure of the DSL directly as free functions, and invoke them immediately when the corrsponding DSL structure is constructed. In other words, the processing logic somehow gets "dependency injected" into the construction logic.
This seemlying unintuitive style of programming actually provides two main benefits over the conventional alternative:
expression
in our DSL to syntactically rule out terms that cannot statically type check:(* The intention here is to use the first parameter as a phantom type, i.e. the actual
definition of this type is going to be [type (_, 'a) typed = 'a] *)
type ('phantom, 'a) typed
type 'v value = { intv : int -> (int, 'v) typed; boolv : bool -> (bool, 'v) typed }
type ('e, 'v) expression = {
constant : 't. ('t, 'v) typed -> ('v, 'e) typed;
(* Note how we only allow adding terms with [int] types. *)
add : (int, 'e) typed -> (int, 'e) typed -> (int, 'e) typed;
(* Note how we only allow comparing terms with the same type. *)
is_equal : 't. ('t, 'e) typed -> ('t, 'e) typed -> (bool, 'e) typed;
}
Doing the same thing with the non-tagless-final style requires advanced language features such as GADT, which is not always available in the host language. Besides, GADT often has its own set of nuisances even in a host language that supports it. Granted, such a flexibility is not actively exploited by this library. But the point that tagless-final style allows us to pursue this potential direction without too much pain still remains.
The main type this module offers, TaglessFinal.t
, works just like the expression
type appeared in the above example: it is nothing more than a collection of free functions on which downstream syntax processors can be defined. Once it is constructed with TaglessFinal.make
, one can then pass it onto various paring APIs in Parser.TaglessFinal
, which serves simliar purpose as the construct_and_eval
function in our running example.
Keeping the structure of TaglessFinal.t
fully authentic to Python's official ast
module is an explicit design goal for this library. Even if Python's own ast
representation may exhibit some obvious issues or inconsistencies, it is not the job of this module to fix them, for both philosophical and techinical reasons. If certain design of ast
is undesirable, the recommended way to address those is to write downstream conversion logic.
module Position : sig ... end
A position is a pair of integers representing line number and column numbers.
module Location : sig ... end
A location is a pair of Position.t
representing a range for a given token.
module Identifier : sig ... end
This module provide a type that represents Python identifiers.
module Constant : sig ... end
This module provides a type that represents Python constant value.
module ExpressionContext : sig ... end
This module provides a type that represents Python expression contexts.
module BooleanOperator : sig ... end
This module provides a type that represents Python boolean operators.
module BinaryOperator : sig ... end
This module provides a type that represents Python numerical binary operators.
module UnaryOperator : sig ... end
This module provides a type that represents Python unary operators.
module ComparisonOperator : sig ... end
This module provides a type that represents Python comparison operators.
module Comprehension : sig ... end
This module provides a type that represents Python comprehension structures.
module Keyword : sig ... end
This module provides a type that represents keyword arguments at each callsite.
module Argument : sig ... end
This module provides a type that represents single parameter at each function definition site.
module Arguments : sig ... end
This module provides a type that represents a parameter list at each function definition site.
module Expression : sig ... end
This module provides a type that represents a Python expression.
module WithItem : sig ... end
This module provides a type that represents a Python with
item.
module ImportAlias : sig ... end
This module provides a type that represents a Python import
item.
module TypeParam : sig ... end
This module provides a type that represents a Python type parameter item. See PEP 695.
module ExceptionHandler : sig ... end
This module provides a type that represents a Python exception handler.
module MatchCase : sig ... end
This module provides a type that represents a branch of the Python match
statement. See PEP 622.
module Pattern : sig ... end
This module provides a type that represents a pattern for a given match
branch. See PEP 622.
module Statement : sig ... end
This module provides a type that represents a Python statement.
module TypeIgnore : sig ... end
This module provides a type that represents a Python type ignore item.
module Module : sig ... end
This module provides a type that represents a Python module.
module FunctionType : sig ... end
This module provides a type that represents a Python function type signature. See Parser.TaglessFinal.parse_function_type
.
type ('argument, 'arguments, 'binary_operator, 'boolean_operator, 'comparison_operator, 'comprehension, 'constant, 'exception_handler, 'expression, 'expression_context, 'function_type, 'identifier, 'import_alias, 'keyword, 'location, 'match_case, 'module_, 'pattern, 'position, 'statement, 'type_ignore, 'type_param, 'unary_operator, 'with_item)
t =
private {
argument : ('expression, 'identifier, 'location, 'argument) Argument.t;
arguments : ('argument, 'expression, 'arguments) Arguments.t;
binary_operator : 'binary_operator BinaryOperator.t;
boolean_operator : 'boolean_operator BooleanOperator.t;
comparison_operator : 'comparison_operator ComparisonOperator.t;
comprehension : ('expression, 'comprehension) Comprehension.t;
constant : 'constant Constant.t;
exception_handler : ('expression,
'identifier,
'location,
'statement,
'exception_handler)
ExceptionHandler.t;
expression : ('arguments,
'binary_operator,
'boolean_operator,
'comparison_operator,
'comprehension,
'constant,
'expression_context,
'identifier,
'keyword,
'location,
'unary_operator,
'expression)
Expression.t;
expression_context : 'expression_context ExpressionContext.t;
function_type : ('expression, 'function_type) FunctionType.t;
identifier : 'identifier Identifier.t;
import_alias : ('identifier, 'location, 'import_alias) ImportAlias.t;
keyword : ('expression, 'identifier, 'location, 'keyword) Keyword.t;
location : ('position, 'location) Location.t;
match_case : ('expression, 'pattern, 'statement, 'match_case) MatchCase.t;
module_ : ('statement, 'type_ignore, 'module_) Module.t;
pattern : ('constant, 'expression, 'identifier, 'location, 'pattern) Pattern.t;
position : 'position Position.t;
statement : ('import_alias,
'arguments,
'binary_operator,
'exception_handler,
'expression,
'identifier,
'keyword,
'location,
'match_case,
'type_param,
'with_item,
'statement)
Statement.t;
type_ignore : 'type_ignore TypeIgnore.t;
type_param : ('expression, 'identifier, 'location, 'type_param) TypeParam.t;
unary_operator : 'unary_operator UnaryOperator.t;
with_item : ('expression, 'with_item) WithItem.t;
}
val make :
argument:('a, 'b, 'c, 'd) Argument.t ->
arguments:('d, 'a, 'e) Arguments.t ->
binary_operator:'f BinaryOperator.t ->
boolean_operator:'g BooleanOperator.t ->
comparison_operator:'h ComparisonOperator.t ->
comprehension:('a, 'i) Comprehension.t ->
constant:'j Constant.t ->
exception_handler:('a, 'b, 'c, 'k, 'l) ExceptionHandler.t ->
expression:('e, 'f, 'g, 'h, 'i, 'j, 'm, 'b, 'n, 'c, 'o, 'a) Expression.t ->
expression_context:'m ExpressionContext.t ->
function_type:('a, 'p) FunctionType.t ->
identifier:'b Identifier.t ->
import_alias:('b, 'c, 'q) ImportAlias.t ->
keyword:('a, 'b, 'c, 'n) Keyword.t ->
location:('r, 'c) Location.t ->
match_case:('a, 's, 'k, 't) MatchCase.t ->
module_:('k, 'u, 'v) Module.t ->
pattern:('j, 'a, 'b, 'c, 's) Pattern.t ->
position:'r Position.t ->
statement:('q, 'e, 'f, 'l, 'a, 'b, 'n, 'c, 't, 'w, 'x, 'k) Statement.t ->
type_ignore:'u TypeIgnore.t ->
type_param:('a, 'b, 'c, 'w) TypeParam.t ->
unary_operator:'o UnaryOperator.t ->
with_item:('a, 'x) WithItem.t ->
unit ->
('d,
'e,
'f,
'g,
'h,
'i,
'j,
'l,
'a,
'm,
'p,
'b,
'q,
'n,
'c,
't,
'v,
's,
'r,
'k,
'u,
'w,
'o,
'x)
t
Constructor of t
.