From 904f651897c90a864d35cfc724ad3787c7d87357 Mon Sep 17 00:00:00 2001 From: Glen Whitney Date: Thu, 11 Feb 2021 10:02:43 -0800 Subject: [PATCH 1/6] feat: Add syntax for string literals Also rudimentary code generation. The difficulty is that for Haskell generation, we need to know whether an expression is a string or in to send it to standard output. So we will need to begin implementation of a type system for fostr. --- syntax/fostr.sdf3 | 21 +++++++++++++++------ tests/hw.fos | 1 + trans/haskell.str | 12 ++++++++++++ trans/javascript.str | 10 ++++++++++ trans/python.str | 1 + 5 files changed, 39 insertions(+), 6 deletions(-) create mode 100644 tests/hw.fos diff --git a/syntax/fostr.sdf3 b/syntax/fostr.sdf3 index 4a39131..7805b3f 100644 --- a/syntax/fostr.sdf3 +++ b/syntax/fostr.sdf3 @@ -8,6 +8,14 @@ context-free start-symbols Start +lexical sorts + + STRING_LITERAL + +lexical syntax + + STRING_LITERAL = ~[\']* + context-free sorts Start LineSeq Line OptTermEx TermExLst TermEx Ex @@ -29,13 +37,14 @@ context-free syntax TermEx.Terminate = <;> - Ex.Int = INT - Ex.Stream = - Ex.Sum = [[Ex] + [Ex]] {left} - Ex.Gets = [[Ex] << [Ex]] {left} - Ex.To = [[Ex] >> [Ex]] {left} + Ex.Int = INT + Ex.LitString = <''> + Ex.Stream = + Ex.Sum = < + > {left} + Ex.Gets = [[Ex] << [Ex]] {left} + Ex.To = [[Ex] >> [Ex]] {left} - Ex = <()> {bracket} + Ex = <()> {bracket} context-free priorities diff --git a/tests/hw.fos b/tests/hw.fos new file mode 100644 index 0000000..90fc0d4 --- /dev/null +++ b/tests/hw.fos @@ -0,0 +1 @@ +stream << 'Hello, world!' diff --git a/trans/haskell.str b/trans/haskell.str index eae335e..63c969e 100644 --- a/trans/haskell.str +++ b/trans/haskell.str @@ -22,6 +22,8 @@ rules hs: Stream() -> ("StdIO", "") hs: Int(x) -> (x, "") + hs: LitString(x) + -> ($["[x]"], "") hs: Sum( (c, p), (d, q)) -> ($[([c] + [d])], (p,q)) hs: Gets((c, p), (d, q)) -> (c,d,(p,q),"fosgt") @@ -35,7 +37,17 @@ rules hs: Terminate((c,p)) -> ($[[c];;], p) hs: Sequence(l) -> (l, l) + /* Characters we need to escape in Haskell string constants */ + Hascape: ['\t' | cs ] -> ['\', 't' | cs ] + /* I think I can just use ASCII constants for characters... */ + Hascape: [ 0 | cs ] -> ['\', '0' | cs ] + Hascape: [ 7 | cs ] -> ['\', 'a' | cs ] // Alert + Hascape: [ 8 | cs ] -> ['\', 'b' | cs ] // Backspace + Hascape: [ 11 | cs ] -> ['\', 'v' | cs ] // Vertical tab + Hascape: [ 12 | cs ] -> ['\', 'f' | cs ] // Form feed + strategies + HaskellEscape = Escape <+ Hascape haskell = bottomup(try(hs)) diff --git a/trans/javascript.str b/trans/javascript.str index 605b46a..d0d97fe 100644 --- a/trans/javascript.str +++ b/trans/javascript.str @@ -13,13 +13,23 @@ rules js: Stream() -> $[Stdio] js: Int(x) -> x + js: LitString(x) -> $['[x]'] js: Sum(x,y) -> $[[x] + [y]] js: Gets(x, y) -> $[[x].gets([y])] js: To(x, y) -> $[to([x],[y])] js: Terminate(x) -> x js: Sequence(l) -> l + /* Characters we need to escape in Javascript string constants */ + Jscape: ['\t' | cs ] -> ['\', 't' | cs ] + /* I think I can just use ASCII constants for characters... */ + Jscape: [ 0 | cs ] -> ['\', '0' | cs ] + Jscape: [ 8 | cs ] -> ['\', 'b' | cs ] // Backspace + Jscape: [ 11 | cs ] -> ['\', 'v' | cs ] // Vertical tab + Jscape: [ 12 | cs ] -> ['\', 'f' | cs ] // Form feed + strategies + JavaEscape = Escape <+ Jscape javascript = bottomup(try(js)) diff --git a/trans/python.str b/trans/python.str index 238c006..d745cee 100644 --- a/trans/python.str +++ b/trans/python.str @@ -15,6 +15,7 @@ rules py: Stream() -> $[Stdio] py: Int(x) -> x + py: LitString(x) -> $[r'[x]'] py: Sum(x,y) -> $[[x] + [y]] py: Gets(x, y) -> $[[x].gets([y])] py: To(x, y) -> $[to([x],[y])] -- 2.34.1 From b3f9cdf372c7cf5a3bbc8590d608ce7e849c3c4b Mon Sep 17 00:00:00 2001 From: Glen Whitney Date: Fri, 12 Feb 2021 17:08:55 -0800 Subject: [PATCH 2/6] docs: Add statics development documentation Hopefully the new section will be helpful, given that it can be a bit confusing to get started with Statix. With this commit, the background is established and the stage is set to dive into type checking. --- mkdocs.yml | 1 + trans/statics.stx | 48 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/mkdocs.yml b/mkdocs.yml index 6726b34..e316a98 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -2,6 +2,7 @@ site_name: fostr language nav: - README.md - tests/basic.md +- trans/statics.md - implementation.md plugins: diff --git a/trans/statics.stx b/trans/statics.stx index 7772698..fa795fe 100644 --- a/trans/statics.stx +++ b/trans/statics.stx @@ -2,7 +2,53 @@ module statics imports signatures/fostr-sig -// see docs/implementation.md for details on how to switch to multi-file analysis +/** md +Title: Adding Program Analysis with Statix + +## Development of fostr static analysis + +This section is more documentation of Spoofax in general and Statix +in particular than of fostr itself, but is being maintained here in case +it could be either helpful to someone getting started with Statix or +helpful in understanding how the static characteristics of fostr were designed. + +As mentioned in the [Overview](../README.md), I don't like to program and a +corollary of that is never to use a facility unless/until there's a need for +it. So the first few rudimentary passes at fostr simply declared every program +to be "OK" from the point of view of Statix: +```statix +{! "\git docs/statix_start:trans/statics.stx" extract: + start: programOk + stop: (.*TopLevel.*) +!} +``` + +Then I reached the point at which the grammar was basically just +```SDF3 +// Start.TopLevel = +// Ex.Sequence = sq:Ex+ {layout: align-list sq} +// Ex.Terminated = <;> +{! "\git docs/statix_start:syntax/fostr.sdf3" extract: + start: TermEx.Terminate + stop: (.*bracket.*) +!} +``` +(The first three clauses are in comments because they approximate fostr's +grammar; it actually uses a few more sorts for sequences of +expressions, to acheive fostr's exact layout rules.) + +This was the first point at which there were two different types that might +need to be written to standard output (Int and String), and although of course +the dynamically-typed Python and Javascript code generated dealt with both fine, +the Haskell code needed to differ depending on the +type of the item written (and I hadn't even started OCaml code generation at +that point since I knew it would be hopeless without statically typing fostr +programs). + +So it was time to bite the bullet and add type checking via Statix to fostr. +**/ + +// see docs/implementation.md for detail on how to switch to multi-file analysis rules // single-file entry point -- 2.34.1 From 5cd75b817748695db47624f31f9f14b5503a9d55 Mon Sep 17 00:00:00 2001 From: Glen Whitney Date: Sun, 14 Feb 2021 10:25:23 -0800 Subject: [PATCH 3/6] feat: Initial statix assignment of types to expressions --- bin/extract_tests.xsh | 4 +- editor/Analysis.esv | 1 + tests/basic.spt | 10 +++ trans/analysis.str | 24 +++++++ trans/statics.stx | 154 ++++++++++++++++++++++++++++++++++++++++-- 5 files changed, 186 insertions(+), 7 deletions(-) diff --git a/bin/extract_tests.xsh b/bin/extract_tests.xsh index 79b2ca8..7cf131a 100644 --- a/bin/extract_tests.xsh +++ b/bin/extract_tests.xsh @@ -27,9 +27,9 @@ for path in TEST_LIST: if pfm: continue # skip examples that don't parse ntfm = re.search(r'\n\s*\]\].*?don.t.test', details) if ntfm: continue # explicit skip - em = re.search(r'\n\s*\]\]', details) + em = re.search(r'\n\]\]', details) if not em: continue - example = details[:em.start()+1] + example = details[:em.start()+1].replace('[[','').replace(']]','') expath = destdir / f"{name}.{EXT}" expath.write_text(example) echo Wrote @(expath) diff --git a/editor/Analysis.esv b/editor/Analysis.esv index 0667f1e..eb3197f 100644 --- a/editor/Analysis.esv +++ b/editor/Analysis.esv @@ -22,3 +22,4 @@ menus action: "Show pre-analyzed AST" = debug-show-pre-analyzed (source) action: "Show analyzed AST" = debug-show-analyzed + action: "Show analyzed type" = debug-show-type diff --git a/tests/basic.spt b/tests/basic.spt index e3f4f69..075a4c3 100644 --- a/tests/basic.spt +++ b/tests/basic.spt @@ -1,6 +1,16 @@ module basic language fostr + +test hw1_type [[ +[[stream]] << [['Hello, world!']] +]] +run get-type on #1 to STREAM() +run get-type on #2 to STRING() +run get-type to STREAM() +/** writes +Hello, world!**/ + /** md Title: A whirlwind tour of fostr diff --git a/trans/analysis.str b/trans/analysis.str index 70919cc..858d1ec 100644 --- a/trans/analysis.str +++ b/trans/analysis.str @@ -1,5 +1,14 @@ module analysis +signature + sorts + TYPE + + constructors + INT : TYPE + STRING : TYPE + STREAM : TYPE + imports statixruntime @@ -51,3 +60,18 @@ rules // Debugging debug-show-analyzed: (sel, _, _, path, projp) -> (filename, result) with filename := path ; result := sel + + // Extract the type assigned to a node by Statix + get-type: node -> type + where + // Assigns variable a to be the result of the Statix analysis of the entire program (or throws an error) + a := node]])>; + // Gets the type of the given node (or throws an error) + type := node]])> node + + fail-msg(|msg) = err-msg(|$[get-type: [msg]]); fail + + // Prints the analyzed type of a selection. + debug-show-type: (sel, _, _, path, projp) -> (filename, result) + with filename := path + ; result := sel diff --git a/trans/statics.stx b/trans/statics.stx index fa795fe..0099855 100644 --- a/trans/statics.stx +++ b/trans/statics.stx @@ -25,17 +25,18 @@ to be "OK" from the point of view of Statix: Then I reached the point at which the grammar was basically just ```SDF3 -// Start.TopLevel = -// Ex.Sequence = sq:Ex+ {layout: align-list sq} +// Start.TopLevel = +// Seq = +// Seq.Sequence = sq:Ex+ {layout(align-list sq)} // Ex.Terminated = <;> {! "\git docs/statix_start:syntax/fostr.sdf3" extract: start: TermEx.Terminate stop: (.*bracket.*) !} ``` -(The first three clauses are in comments because they approximate fostr's +(The first four clauses are in comments because they approximate fostr's grammar; it actually uses a few more sorts for sequences of -expressions, to acheive fostr's exact layout rules.) +expressions, to achieve fostr's exact layout rules.) This was the first point at which there were two different types that might need to be written to standard output (Int and String), and although of course @@ -46,15 +47,158 @@ that point since I knew it would be hopeless without statically typing fostr programs). So it was time to bite the bullet and add type checking via Statix to fostr. +The first step is to replace the simple assertion that any TopLevel +is OK with a constraint that its Seq must type properly, and an assignment of +that type to the top level node: +```statix +programOk(tl@TopLevel(seq)) :- {T} + type_Seq(seq) == T, + @tl.type := T. +``` +Of course, for this to even parse, we must have a definition of `type_Seq`: +```statix **/ +/** md */ +signature + sorts TYPE // semantic type + constructors + INT : TYPE + STRING : TYPE + STREAM : TYPE +/* **/ + // see docs/implementation.md for detail on how to switch to multi-file analysis rules // single-file entry point programOk : Start - programOk(TopLevel(_)). + /** md +rules + type_Seq : Seq -> TYPE +``` + **/ + + type_LineSeq : LineSeq -> TYPE + + programOk(tl@TopLevel(seq)) :- {T} + type_LineSeq(seq) == T, + @tl.type := T. + + /** md +Now to type a Seq, we look to the syntax, and see that there are two +possibilities for what it might be: just an Ex, or a Sequence(_) of a +list of 'Ex's. For the first, Statix does not allow one sort to simply +"become" another, but the Spoofax infrastructure automatically inserts +"injection" constructors for us, in this case one named Ex2Seq. So the +first rule for `type_Seq` is straightforward: + +```statix + type_Seq(s@Ex2Seq(e)) = T : - + type_Ex(e) == T, + @s.type := T. +``` +where of course type_Ex needs its own declaration analogous to the above. + **/ + + type_Line : Line -> TYPE + + type_LineSeq(ls@Line2LineSeq(l)) = T :- + type_Line(l) == T, + @ls.type := T. + + type_OptTermEx : OptTermEx -> TYPE + + type_Line(l@OptTermEx2Line(ote)) = T :- + type_OptTermEx(ote) == T, + @l.type := T. + + type_Ex : Ex -> TYPE + + type_OptTermEx(ote@Ex2OptTermEx(e)) = T :- + type_Ex(e) == T, + @ote.type :=T. + + /** md + +This brings us to the syntax rules for the basic expressions themselves, +which comprise almost all of the remaining fostr language constructs. +But first a mechanism suggested by Ivo Wilms to avoid repeating the node +type annotation in every rule: +```statix + **/ + + /** md */ + ty_Ex : Ex -> TYPE + + type_Ex(e) = ty@ty_Ex(e) :- + @e.type := ty. + /* **/ + + /** md +``` +At this stage in fostr's development, there was no difference between a +terminated and unterminated expression, so the typing rule for that +constructor was trivial: +```statix + ty_Ex(Terminated(e)) = ty_Ex(e). +``` + **/ + + type_TermEx: TermEx -> TYPE + + type_TermEx(te@Terminate(e)) = T :- + type_Ex(e) == T, + @te.type := T. + + /** md + +Now typing literals is straightforward: +```statix + **/ + + /** md */ + ty_Ex(Int(_)) = INT(). + ty_Ex(LitString(_)) = STRING(). + ty_Ex(e@Stream()) = STREAM(). + /* **/ + + /** md +``` + +Finally we get to the binary operators, and here we use the pattern found in +recent versions of the +"[chicago](https://github.com/MetaBorgCube/statix-sandbox/tree/master/chicago)" +example language and in the Fall 2020 TU-Delft class lecture on +[Name Binding and Name Resolution](https://tudelft-cs4200-2020.github.io/lectures/2020/09/24/lecture5/). +This pattern lets us specify error messages. + +```statix + **/ + + /** md */ + ty_Ex(Sum(e1, e2)) = INT() :- + type_Ex(e1) == INT() | error $[Expression [e1] not an Int in sum.]@e1, + type_Ex(e2) == INT() | error $[Expression [e2] not an Int in sum.]@e2. + + ty_Ex(Gets(e1, e2)) = STREAM() :- {T} + type_Ex(e1) == STREAM() | error $[Only Streams may receive items.]@e1, + type_Ex(e2) == T. + + ty_Ex(To(e1, e2)) = T :- + type_Ex(e1) == T, + type_Ex(e2) == STREAM() | error $[Items may only be sent to Streams.]@e2. + /* **/ + + /** md +``` + +### Using type annotations in transformation + +_Probably want to include stuff from analysis.str/ haskell.str here_ + + **/ rules // multi-file entry point -- 2.34.1 From 804a00902a2ad6fea172378caa235acf701d0a0a Mon Sep 17 00:00:00 2001 From: Glen Whitney Date: Tue, 16 Feb 2021 09:46:12 -0800 Subject: [PATCH 4/6] feat: Type-dependent Haskell code generation Caveat: type is still not being assigned for the Sequence() constructor. Also fixes the parsing of literal strings (whitespace just after the opening quote was being ignored, and was ambiguous just before the opening quote). --- README.md | 3 +- syntax/fostr.sdf3 | 4 +-- tests/basic.spt | 6 ++-- trans/haskell.str | 74 ++++++++++++++++++++++++++++++-------------- trans/javascript.str | 6 ++-- trans/python.str | 2 +- 6 files changed, 62 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index 91e1605..0df4cba 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,8 @@ language as possible to work in, given that I inevitably will be doing a bunch of coding. The language will be centrally organized around the concept of "streams" (somewhat in the spirit of [streem](https://github.com/matz/streem) and/or -[Orc](http://orc.csres.utexas.edu/index.shtml)). In fact all higher-type +[Orc](http://orc.csres.utexas.edu/index.shtml), or to a lesser extent, +[Sisal-is](https://github.com/parsifal-47/sisal-is)). In fact all higher-type entities will be cast in terms of streams, or in slogan form, "++f++unctions and (binary) ++o++perators are ++str++eams" (hence the name "fostr"). diff --git a/syntax/fostr.sdf3 b/syntax/fostr.sdf3 index 7805b3f..4dc4569 100644 --- a/syntax/fostr.sdf3 +++ b/syntax/fostr.sdf3 @@ -14,7 +14,7 @@ lexical sorts lexical syntax - STRING_LITERAL = ~[\']* + STRING_LITERAL = "'"~[\']*"'" context-free sorts @@ -38,7 +38,7 @@ context-free syntax TermEx.Terminate = <;> Ex.Int = INT - Ex.LitString = <''> + Ex.LitString = STRING_LITERAL Ex.Stream = Ex.Sum = < + > {left} Ex.Gets = [[Ex] << [Ex]] {left} diff --git a/tests/basic.spt b/tests/basic.spt index 075a4c3..e1cadf9 100644 --- a/tests/basic.spt +++ b/tests/basic.spt @@ -1,15 +1,15 @@ module basic language fostr - test hw1_type [[ -[[stream]] << [['Hello, world!']] +[[stream]] << [['Hello, world! ']] << [[3+2]] << ' times.' ]] run get-type on #1 to STREAM() run get-type on #2 to STRING() +run get-type on #3 to INT() run get-type to STREAM() /** writes -Hello, world!**/ +Hello, world! 5 times.**/ /** md Title: A whirlwind tour of fostr diff --git a/trans/haskell.str b/trans/haskell.str index 63c969e..a22fe36 100644 --- a/trans/haskell.str +++ b/trans/haskell.str @@ -1,15 +1,25 @@ module haskell -imports libstrategolib signatures/- util +imports libstrategolib signatures/- util analysis rules - /* Approach: Generate code from the bottom up. - At every node, we create a pair of the implementation and - necessary preamble of IO actions. - We concatenate preambles as we go up. - Finally, at the toplevel we emit the preamble before returning the - final value. + /* Approach: + A) We will define a local transformation taking a term with value strings + at each child to a value string for the node. + B) We will append IO actions needed to set up for the value progressively + to a Preactions rule (mapping () to the list of actions). There will + be a utility `add-preaction` to append a new clause to value of this + rule. + C) We will use bottomup-para to traverse the full AST with the + transformation from A so that we have access to the original expression + (and get get the Statix-associated type when we need to). + Hence the transformation in (A) must actually take a pair of + an (original) term and a term with value strings at each child, + and be certain to return a value string. + + Finally, at the toplevel we emit the result of () before + returning the final value. */ - hs: TopLevel((c,p)) -> $[import System.IO + hs: (_, TopLevel(val)) -> $[import System.IO data IOStream = StdIO gets :: Show b => a -> b -> IO a @@ -17,25 +27,35 @@ rules putStr(show d) return s + getsStr :: a -> String -> IO a + getsStr s d = do + putStr(d) + return s + main = do - [p]return [c]] + [()]return [val]] - hs: Stream() -> ("StdIO", "") - hs: Int(x) -> (x, "") - hs: LitString(x) - -> ($["[x]"], "") - hs: Sum( (c, p), (d, q)) -> ($[([c] + [d])], (p,q)) + hs: (_, Stream()) -> "StdIO" + hs: (_, Int(x)) -> x + hs: (_, LitString(x)) -> x + hs: (_, Sum(x, y)) -> $[([x] + [y])] - hs: Gets((c, p), (d, q)) -> (c,d,(p,q),"fosgt") - hsget: (s, x, p, v) -> (v, [p, $[[v] <- [s] `gets` [x]], - "\n"]) + hs: (Gets(_, xn), Gets(s, x)) -> v + with v := "_fostr_get" + ; [$[[v] <- [(s, xn, x)]]] + hs: (To(xn, _), To(x, s)) -> v + with v := "_fostr_to" + ; [$[let [v] = [x]], (s, xn, v)] - hs: To( (c, p), (d, q)) -> (c,d,(p,q),"fosto") - hsto: (x, s, p, v) -> (v, [p, $[let [v] = [x]], "\n", - $[[s] `gets` [v]], "\n"]) + hs_gets: (s, xn, x ) -> $[[s] [xn] [x]] + hs_getOp = get-type; (?STRING() < !"`getsStr`" + !"`gets`") - hs: Terminate((c,p)) -> ($[[c];;], p) - hs: Sequence(l) -> (l, l) + hs: (_, Terminate(x)) -> $[[x];;] + hs: (_, Sequence(l)) -> l + /* One drawback of using paramorphism is at the very leaves we have + to undouble the tuple: + */ + hs: (x, x) -> x where x /* Characters we need to escape in Haskell string constants */ Hascape: ['\t' | cs ] -> ['\', 't' | cs ] @@ -47,9 +67,15 @@ rules Hascape: [ 12 | cs ] -> ['\', 'f' | cs ] // Form feed strategies - HaskellEscape = Escape <+ Hascape + haskLitString = un-single-quote + ; string-as-chars(escape-chars(Escape <+ Hascape)) + ; double-quote - haskell = bottomup(try(hs)) + haskell = rules(Preactions: () -> ""); bottomup-para(try(hs)) + + /* See "Approach" at top of file */ + add-preactions = newp := ((), ) + ; rules(Preactions: () -> newp) // Interface haskell code generation with editor services and file system to-haskell: (selected, _, _, path, project-path) -> (filename, result) diff --git a/trans/javascript.str b/trans/javascript.str index d0d97fe..645cc63 100644 --- a/trans/javascript.str +++ b/trans/javascript.str @@ -13,7 +13,7 @@ rules js: Stream() -> $[Stdio] js: Int(x) -> x - js: LitString(x) -> $['[x]'] + js: LitString(x) -> x js: Sum(x,y) -> $[[x] + [y]] js: Gets(x, y) -> $[[x].gets([y])] js: To(x, y) -> $[to([x],[y])] @@ -29,7 +29,9 @@ rules Jscape: [ 12 | cs ] -> ['\', 'f' | cs ] // Form feed strategies - JavaEscape = Escape <+ Jscape + javaLitString = un-single-quote + ; string-as-chars(escape-chars(Escape <+ Jscape)) + ; single-quote javascript = bottomup(try(js)) diff --git a/trans/python.str b/trans/python.str index d745cee..7aa4e06 100644 --- a/trans/python.str +++ b/trans/python.str @@ -15,7 +15,7 @@ rules py: Stream() -> $[Stdio] py: Int(x) -> x - py: LitString(x) -> $[r'[x]'] + py: LitString(x) -> $[r[x]] py: Sum(x,y) -> $[[x] + [y]] py: Gets(x, y) -> $[[x].gets([y])] py: To(x, y) -> $[to([x],[y])] -- 2.34.1 From f93499acfd0c8328950aa0416be142f5659d7c97 Mon Sep 17 00:00:00 2001 From: Glen Whitney Date: Wed, 17 Feb 2021 11:20:26 -0800 Subject: [PATCH 5/6] feat: Add typing for Sequence() operation Also move the signature of the semantic sort TYPE into its own file to facilitate sharing between Statix and Stratego. (Currently it is shared via symbolic link, but that may cause problems down the line; if/when it does, will have to look at physically copying the file into src-gen via an "Additional build step" using either Stratego or Ant. Also documents using Statix types from Stratego. --- signature/TYPE.str | 1 + signature/TYPE.stx | 7 ++++ statics/util.stx | 7 ++++ tests/emit_thrice.fos | 2 +- trans/analysis.str | 10 ------ trans/haskell.str | 10 ++++-- trans/statics.stx | 74 ++++++++++++++++++++++++++++++++++++------- 7 files changed, 86 insertions(+), 25 deletions(-) create mode 120000 signature/TYPE.str create mode 100644 signature/TYPE.stx create mode 100644 statics/util.stx diff --git a/signature/TYPE.str b/signature/TYPE.str new file mode 120000 index 0000000..332d8ef --- /dev/null +++ b/signature/TYPE.str @@ -0,0 +1 @@ +TYPE.stx \ No newline at end of file diff --git a/signature/TYPE.stx b/signature/TYPE.stx new file mode 100644 index 0000000..e299f8b --- /dev/null +++ b/signature/TYPE.stx @@ -0,0 +1,7 @@ +module signature/TYPE +signature + sorts TYPE // semantic type + constructors + INT : TYPE + STRING : TYPE + STREAM : TYPE diff --git a/statics/util.stx b/statics/util.stx new file mode 100644 index 0000000..7ce94e7 --- /dev/null +++ b/statics/util.stx @@ -0,0 +1,7 @@ +module statics/util +imports signature/TYPE + +rules + lastTYPE : list(TYPE) -> TYPE + lastTYPE([T]) = T. + lastTYPE([U | TS]) = lastTYPE(TS). diff --git a/tests/emit_thrice.fos b/tests/emit_thrice.fos index 6042aad..232e2be 100644 --- a/tests/emit_thrice.fos +++ b/tests/emit_thrice.fos @@ -1,4 +1,4 @@ - stream << 72 + 87 + stream << 'Some numbers: ' stream << 88 + 96 99 + 12 >> diff --git a/trans/analysis.str b/trans/analysis.str index 858d1ec..e0c22ef 100644 --- a/trans/analysis.str +++ b/trans/analysis.str @@ -1,14 +1,4 @@ module analysis - -signature - sorts - TYPE - - constructors - INT : TYPE - STRING : TYPE - STREAM : TYPE - imports statixruntime diff --git a/trans/haskell.str b/trans/haskell.str index a22fe36..560b29c 100644 --- a/trans/haskell.str +++ b/trans/haskell.str @@ -1,5 +1,5 @@ module haskell -imports libstrategolib signatures/- util analysis +imports libstrategolib signatures/- signature/TYPE util analysis rules /* Approach: A) We will define a local transformation taking a term with value strings @@ -52,7 +52,13 @@ rules hs: (_, Terminate(x)) -> $[[x];;] hs: (_, Sequence(l)) -> l - /* One drawback of using paramorphism is at the very leaves we have + /* One drawback of using paramorphism is we have to handle lists + explicitly: + */ + hs: (_, []) -> [] + hs: (_, [x | xs]) -> [x | xs] + + /* Another drawback of using paramorphism is at the very leaves we have to undouble the tuple: */ hs: (x, x) -> x where x diff --git a/trans/statics.stx b/trans/statics.stx index 0099855..b6a7d83 100644 --- a/trans/statics.stx +++ b/trans/statics.stx @@ -1,6 +1,8 @@ module statics imports signatures/fostr-sig +imports signature/TYPE +imports statics/util /** md Title: Adding Program Analysis with Statix @@ -36,7 +38,10 @@ Then I reached the point at which the grammar was basically just ``` (The first four clauses are in comments because they approximate fostr's grammar; it actually uses a few more sorts for sequences of -expressions, to achieve fostr's exact layout rules.) +expressions, to achieve fostr's exact layout rules. Also note that the parsing +of literal strings later evolved to include the surrounding single quotes, +because the rule above implicitly allows layout between the quotes and the +string contents, creating ambiguity.) This was the first point at which there were two different types that might need to be written to standard output (Int and String), and although of course @@ -47,7 +52,7 @@ that point since I knew it would be hopeless without statically typing fostr programs). So it was time to bite the bullet and add type checking via Statix to fostr. -The first step is to replace the simple assertion that any TopLevel +The first step was to replace the simple assertion that any TopLevel is OK with a constraint that its Seq must type properly, and an assignment of that type to the top level node: ```statix @@ -57,17 +62,9 @@ programOk(tl@TopLevel(seq)) :- {T} ``` Of course, for this to even parse, we must have a definition of `type_Seq`: ```statix +{! ../signature/TYPE.stx extract: {start: module, stop: rules} !} **/ -/** md */ -signature - sorts TYPE // semantic type - constructors - INT : TYPE - STRING : TYPE - STREAM : TYPE -/* **/ - // see docs/implementation.md for detail on how to switch to multi-file analysis rules // single-file entry point @@ -108,6 +105,34 @@ where of course type_Ex needs its own declaration analogous to the above. type_Line(l) == T, @ls.type := T. + /** md + +The other (and in fact more typical) rule for `type_Seq`, when it actually +consists of a sequence of expressions, is a bit more involved. Fortunately +Statix provides a primitive for mapping over a list, so we can proceed as +follows: +```statix + types_Exs maps type_Ex(list(*)) = list(*) + type_Seq(s@Sequence(l)) = T :- {lt} + types_Exs(l) == lt, + lastTYPE(lt) == T, + @s.type := T. +``` +Here `lastTYPE` is a function that extracts the last TYPE from a list. +Unless/until Statix develops some sort of standard library, it must be +hand-defined, as done in "statics/util.stx" like so: +```statix +{! ../statics/util.stx extract: {start: lastTYPE} !} +``` + **/ + + types_Lines maps type_Line(list(*)) = list(*) + + type_LineSeq(ls@Sequence(l)) = T :- {lt} + types_Lines(l) == lt, + lastTYPE(lt) == T, + @ls.type := T. + type_OptTermEx : OptTermEx -> TYPE type_Line(l@OptTermEx2Line(ote)) = T :- @@ -196,8 +221,33 @@ This pattern lets us specify error messages. ### Using type annotations in transformation -_Probably want to include stuff from analysis.str/ haskell.str here_ +At this point, Statix properly types all of the valid programs of the very +rudimentary language defined by the grammar above. But the proximate purpose +for implementing this typing was to aid Haskell code generation. So how +do we actually use the assigned types in a Stratego transformation? +Statix provides a Stratego api that includes, among other items, strategies +`stx-get-ast-analysis` and `stx-get-ast-type(|analysis)` that provide access +to the assigned types. However, it's easiest to use the information via +a wrapper like this, essentially lifted from the "chicago" language project: +```stratego +{! analysis.str extract: + start: Extract.the.type +terminate: Prints.the.analyzed.type +!} +``` + +Now `get_type` run on a node of the analyzed AST produces the assigned `TYPE` +(as an ATerm in the constructors of sort TYPE in Statix). + +Thus, you can select on the assigned type, as in the strategy to select +the correct Haskell operator to use to send an item to standard output: +```stratego +{! haskell.str extract: + start: '(.*hs_getOp.=.*)' + stop: \s +!} +``` **/ rules // multi-file entry point -- 2.34.1 From 2514f0df9804c4aa7e86c2958de79fec74d75c61 Mon Sep 17 00:00:00 2001 From: Glen Whitney Date: Wed, 17 Feb 2021 19:47:29 -0800 Subject: [PATCH 6/6] fix: handle the TermEx->OptTermEx injection in Statix --- trans/statics.stx | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/trans/statics.stx b/trans/statics.stx index b6a7d83..ef63c8d 100644 --- a/trans/statics.stx +++ b/trans/statics.stx @@ -139,11 +139,16 @@ hand-defined, as done in "statics/util.stx" like so: type_OptTermEx(ote) == T, @l.type := T. - type_Ex : Ex -> TYPE + type_Ex : Ex -> TYPE + type_TermEx : TermEx -> TYPE type_OptTermEx(ote@Ex2OptTermEx(e)) = T :- type_Ex(e) == T, - @ote.type :=T. + @ote.type := T. + + type_OptTermEx(ote@TermEx2OptTermEx(te)) = T :- + type_TermEx(te) == T, + @ote.type := T. /** md @@ -171,8 +176,6 @@ constructor was trivial: ``` **/ - type_TermEx: TermEx -> TYPE - type_TermEx(te@Terminate(e)) = T :- type_Ex(e) == T, @te.type := T. -- 2.34.1