From 5cd75b817748695db47624f31f9f14b5503a9d55 Mon Sep 17 00:00:00 2001 From: Glen Whitney Date: Sun, 14 Feb 2021 10:25:23 -0800 Subject: [PATCH] feat: Initial statix assignment of types to expressions --- bin/extract_tests.xsh | 4 +- editor/Analysis.esv | 1 + tests/basic.spt | 10 +++ trans/analysis.str | 24 +++++++ trans/statics.stx | 154 ++++++++++++++++++++++++++++++++++++++++-- 5 files changed, 186 insertions(+), 7 deletions(-) diff --git a/bin/extract_tests.xsh b/bin/extract_tests.xsh index 79b2ca8..7cf131a 100644 --- a/bin/extract_tests.xsh +++ b/bin/extract_tests.xsh @@ -27,9 +27,9 @@ for path in TEST_LIST: if pfm: continue # skip examples that don't parse ntfm = re.search(r'\n\s*\]\].*?don.t.test', details) if ntfm: continue # explicit skip - em = re.search(r'\n\s*\]\]', details) + em = re.search(r'\n\]\]', details) if not em: continue - example = details[:em.start()+1] + example = details[:em.start()+1].replace('[[','').replace(']]','') expath = destdir / f"{name}.{EXT}" expath.write_text(example) echo Wrote @(expath) diff --git a/editor/Analysis.esv b/editor/Analysis.esv index 0667f1e..eb3197f 100644 --- a/editor/Analysis.esv +++ b/editor/Analysis.esv @@ -22,3 +22,4 @@ menus action: "Show pre-analyzed AST" = debug-show-pre-analyzed (source) action: "Show analyzed AST" = debug-show-analyzed + action: "Show analyzed type" = debug-show-type diff --git a/tests/basic.spt b/tests/basic.spt index e3f4f69..075a4c3 100644 --- a/tests/basic.spt +++ b/tests/basic.spt @@ -1,6 +1,16 @@ module basic language fostr + +test hw1_type [[ +[[stream]] << [['Hello, world!']] +]] +run get-type on #1 to STREAM() +run get-type on #2 to STRING() +run get-type to STREAM() +/** writes +Hello, world!**/ + /** md Title: A whirlwind tour of fostr diff --git a/trans/analysis.str b/trans/analysis.str index 70919cc..858d1ec 100644 --- a/trans/analysis.str +++ b/trans/analysis.str @@ -1,5 +1,14 @@ module analysis +signature + sorts + TYPE + + constructors + INT : TYPE + STRING : TYPE + STREAM : TYPE + imports statixruntime @@ -51,3 +60,18 @@ rules // Debugging debug-show-analyzed: (sel, _, _, path, projp) -> (filename, result) with filename := path ; result := sel + + // Extract the type assigned to a node by Statix + get-type: node -> type + where + // Assigns variable a to be the result of the Statix analysis of the entire program (or throws an error) + a := node]])>; + // Gets the type of the given node (or throws an error) + type := node]])> node + + fail-msg(|msg) = err-msg(|$[get-type: [msg]]); fail + + // Prints the analyzed type of a selection. + debug-show-type: (sel, _, _, path, projp) -> (filename, result) + with filename := path + ; result := sel diff --git a/trans/statics.stx b/trans/statics.stx index fa795fe..0099855 100644 --- a/trans/statics.stx +++ b/trans/statics.stx @@ -25,17 +25,18 @@ to be "OK" from the point of view of Statix: Then I reached the point at which the grammar was basically just ```SDF3 -// Start.TopLevel = -// Ex.Sequence = sq:Ex+ {layout: align-list sq} +// Start.TopLevel = +// Seq = +// Seq.Sequence = sq:Ex+ {layout(align-list sq)} // Ex.Terminated = <;> {! "\git docs/statix_start:syntax/fostr.sdf3" extract: start: TermEx.Terminate stop: (.*bracket.*) !} ``` -(The first three clauses are in comments because they approximate fostr's +(The first four clauses are in comments because they approximate fostr's grammar; it actually uses a few more sorts for sequences of -expressions, to acheive fostr's exact layout rules.) +expressions, to achieve fostr's exact layout rules.) This was the first point at which there were two different types that might need to be written to standard output (Int and String), and although of course @@ -46,15 +47,158 @@ that point since I knew it would be hopeless without statically typing fostr programs). So it was time to bite the bullet and add type checking via Statix to fostr. +The first step is to replace the simple assertion that any TopLevel +is OK with a constraint that its Seq must type properly, and an assignment of +that type to the top level node: +```statix +programOk(tl@TopLevel(seq)) :- {T} + type_Seq(seq) == T, + @tl.type := T. +``` +Of course, for this to even parse, we must have a definition of `type_Seq`: +```statix **/ +/** md */ +signature + sorts TYPE // semantic type + constructors + INT : TYPE + STRING : TYPE + STREAM : TYPE +/* **/ + // see docs/implementation.md for detail on how to switch to multi-file analysis rules // single-file entry point programOk : Start - programOk(TopLevel(_)). + /** md +rules + type_Seq : Seq -> TYPE +``` + **/ + + type_LineSeq : LineSeq -> TYPE + + programOk(tl@TopLevel(seq)) :- {T} + type_LineSeq(seq) == T, + @tl.type := T. + + /** md +Now to type a Seq, we look to the syntax, and see that there are two +possibilities for what it might be: just an Ex, or a Sequence(_) of a +list of 'Ex's. For the first, Statix does not allow one sort to simply +"become" another, but the Spoofax infrastructure automatically inserts +"injection" constructors for us, in this case one named Ex2Seq. So the +first rule for `type_Seq` is straightforward: + +```statix + type_Seq(s@Ex2Seq(e)) = T : - + type_Ex(e) == T, + @s.type := T. +``` +where of course type_Ex needs its own declaration analogous to the above. + **/ + + type_Line : Line -> TYPE + + type_LineSeq(ls@Line2LineSeq(l)) = T :- + type_Line(l) == T, + @ls.type := T. + + type_OptTermEx : OptTermEx -> TYPE + + type_Line(l@OptTermEx2Line(ote)) = T :- + type_OptTermEx(ote) == T, + @l.type := T. + + type_Ex : Ex -> TYPE + + type_OptTermEx(ote@Ex2OptTermEx(e)) = T :- + type_Ex(e) == T, + @ote.type :=T. + + /** md + +This brings us to the syntax rules for the basic expressions themselves, +which comprise almost all of the remaining fostr language constructs. +But first a mechanism suggested by Ivo Wilms to avoid repeating the node +type annotation in every rule: +```statix + **/ + + /** md */ + ty_Ex : Ex -> TYPE + + type_Ex(e) = ty@ty_Ex(e) :- + @e.type := ty. + /* **/ + + /** md +``` +At this stage in fostr's development, there was no difference between a +terminated and unterminated expression, so the typing rule for that +constructor was trivial: +```statix + ty_Ex(Terminated(e)) = ty_Ex(e). +``` + **/ + + type_TermEx: TermEx -> TYPE + + type_TermEx(te@Terminate(e)) = T :- + type_Ex(e) == T, + @te.type := T. + + /** md + +Now typing literals is straightforward: +```statix + **/ + + /** md */ + ty_Ex(Int(_)) = INT(). + ty_Ex(LitString(_)) = STRING(). + ty_Ex(e@Stream()) = STREAM(). + /* **/ + + /** md +``` + +Finally we get to the binary operators, and here we use the pattern found in +recent versions of the +"[chicago](https://github.com/MetaBorgCube/statix-sandbox/tree/master/chicago)" +example language and in the Fall 2020 TU-Delft class lecture on +[Name Binding and Name Resolution](https://tudelft-cs4200-2020.github.io/lectures/2020/09/24/lecture5/). +This pattern lets us specify error messages. + +```statix + **/ + + /** md */ + ty_Ex(Sum(e1, e2)) = INT() :- + type_Ex(e1) == INT() | error $[Expression [e1] not an Int in sum.]@e1, + type_Ex(e2) == INT() | error $[Expression [e2] not an Int in sum.]@e2. + + ty_Ex(Gets(e1, e2)) = STREAM() :- {T} + type_Ex(e1) == STREAM() | error $[Only Streams may receive items.]@e1, + type_Ex(e2) == T. + + ty_Ex(To(e1, e2)) = T :- + type_Ex(e1) == T, + type_Ex(e2) == STREAM() | error $[Items may only be sent to Streams.]@e2. + /* **/ + + /** md +``` + +### Using type annotations in transformation + +_Probably want to include stuff from analysis.str/ haskell.str here_ + + **/ rules // multi-file entry point