From 904f651897c90a864d35cfc724ad3787c7d87357 Mon Sep 17 00:00:00 2001 From: Glen Whitney Date: Thu, 11 Feb 2021 10:02:43 -0800 Subject: [PATCH 01/12] feat: Add syntax for string literals Also rudimentary code generation. The difficulty is that for Haskell generation, we need to know whether an expression is a string or in to send it to standard output. So we will need to begin implementation of a type system for fostr. --- syntax/fostr.sdf3 | 21 +++++++++++++++------ tests/hw.fos | 1 + trans/haskell.str | 12 ++++++++++++ trans/javascript.str | 10 ++++++++++ trans/python.str | 1 + 5 files changed, 39 insertions(+), 6 deletions(-) create mode 100644 tests/hw.fos diff --git a/syntax/fostr.sdf3 b/syntax/fostr.sdf3 index 4a39131..7805b3f 100644 --- a/syntax/fostr.sdf3 +++ b/syntax/fostr.sdf3 @@ -8,6 +8,14 @@ context-free start-symbols Start +lexical sorts + + STRING_LITERAL + +lexical syntax + + STRING_LITERAL = ~[\']* + context-free sorts Start LineSeq Line OptTermEx TermExLst TermEx Ex @@ -29,13 +37,14 @@ context-free syntax TermEx.Terminate = <;> - Ex.Int = INT - Ex.Stream = - Ex.Sum = [[Ex] + [Ex]] {left} - Ex.Gets = [[Ex] << [Ex]] {left} - Ex.To = [[Ex] >> [Ex]] {left} + Ex.Int = INT + Ex.LitString = <''> + Ex.Stream = + Ex.Sum = < + > {left} + Ex.Gets = [[Ex] << [Ex]] {left} + Ex.To = [[Ex] >> [Ex]] {left} - Ex = <()> {bracket} + Ex = <()> {bracket} context-free priorities diff --git a/tests/hw.fos b/tests/hw.fos new file mode 100644 index 0000000..90fc0d4 --- /dev/null +++ b/tests/hw.fos @@ -0,0 +1 @@ +stream << 'Hello, world!' diff --git a/trans/haskell.str b/trans/haskell.str index eae335e..63c969e 100644 --- a/trans/haskell.str +++ b/trans/haskell.str @@ -22,6 +22,8 @@ rules hs: Stream() -> ("StdIO", "") hs: Int(x) -> (x, "") + hs: LitString(x) + -> ($["[x]"], "") hs: Sum( (c, p), (d, q)) -> ($[([c] + [d])], (p,q)) hs: Gets((c, p), (d, q)) -> (c,d,(p,q),"fosgt") @@ -35,7 +37,17 @@ rules hs: Terminate((c,p)) -> ($[[c];;], p) hs: Sequence(l) -> (l, l) + /* Characters we need to escape in Haskell string constants */ + Hascape: ['\t' | cs ] -> ['\', 't' | cs ] + /* I think I can just use ASCII constants for characters... */ + Hascape: [ 0 | cs ] -> ['\', '0' | cs ] + Hascape: [ 7 | cs ] -> ['\', 'a' | cs ] // Alert + Hascape: [ 8 | cs ] -> ['\', 'b' | cs ] // Backspace + Hascape: [ 11 | cs ] -> ['\', 'v' | cs ] // Vertical tab + Hascape: [ 12 | cs ] -> ['\', 'f' | cs ] // Form feed + strategies + HaskellEscape = Escape <+ Hascape haskell = bottomup(try(hs)) diff --git a/trans/javascript.str b/trans/javascript.str index 605b46a..d0d97fe 100644 --- a/trans/javascript.str +++ b/trans/javascript.str @@ -13,13 +13,23 @@ rules js: Stream() -> $[Stdio] js: Int(x) -> x + js: LitString(x) -> $['[x]'] js: Sum(x,y) -> $[[x] + [y]] js: Gets(x, y) -> $[[x].gets([y])] js: To(x, y) -> $[to([x],[y])] js: Terminate(x) -> x js: Sequence(l) -> l + /* Characters we need to escape in Javascript string constants */ + Jscape: ['\t' | cs ] -> ['\', 't' | cs ] + /* I think I can just use ASCII constants for characters... */ + Jscape: [ 0 | cs ] -> ['\', '0' | cs ] + Jscape: [ 8 | cs ] -> ['\', 'b' | cs ] // Backspace + Jscape: [ 11 | cs ] -> ['\', 'v' | cs ] // Vertical tab + Jscape: [ 12 | cs ] -> ['\', 'f' | cs ] // Form feed + strategies + JavaEscape = Escape <+ Jscape javascript = bottomup(try(js)) diff --git a/trans/python.str b/trans/python.str index 238c006..d745cee 100644 --- a/trans/python.str +++ b/trans/python.str @@ -15,6 +15,7 @@ rules py: Stream() -> $[Stdio] py: Int(x) -> x + py: LitString(x) -> $[r'[x]'] py: Sum(x,y) -> $[[x] + [y]] py: Gets(x, y) -> $[[x].gets([y])] py: To(x, y) -> $[to([x],[y])] From b3f9cdf372c7cf5a3bbc8590d608ce7e849c3c4b Mon Sep 17 00:00:00 2001 From: Glen Whitney Date: Fri, 12 Feb 2021 17:08:55 -0800 Subject: [PATCH 02/12] docs: Add statics development documentation Hopefully the new section will be helpful, given that it can be a bit confusing to get started with Statix. With this commit, the background is established and the stage is set to dive into type checking. --- mkdocs.yml | 1 + trans/statics.stx | 48 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/mkdocs.yml b/mkdocs.yml index 6726b34..e316a98 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -2,6 +2,7 @@ site_name: fostr language nav: - README.md - tests/basic.md +- trans/statics.md - implementation.md plugins: diff --git a/trans/statics.stx b/trans/statics.stx index 7772698..fa795fe 100644 --- a/trans/statics.stx +++ b/trans/statics.stx @@ -2,7 +2,53 @@ module statics imports signatures/fostr-sig -// see docs/implementation.md for details on how to switch to multi-file analysis +/** md +Title: Adding Program Analysis with Statix + +## Development of fostr static analysis + +This section is more documentation of Spoofax in general and Statix +in particular than of fostr itself, but is being maintained here in case +it could be either helpful to someone getting started with Statix or +helpful in understanding how the static characteristics of fostr were designed. + +As mentioned in the [Overview](../README.md), I don't like to program and a +corollary of that is never to use a facility unless/until there's a need for +it. So the first few rudimentary passes at fostr simply declared every program +to be "OK" from the point of view of Statix: +```statix +{! "\git docs/statix_start:trans/statics.stx" extract: + start: programOk + stop: (.*TopLevel.*) +!} +``` + +Then I reached the point at which the grammar was basically just +```SDF3 +// Start.TopLevel = +// Ex.Sequence = sq:Ex+ {layout: align-list sq} +// Ex.Terminated = <;> +{! "\git docs/statix_start:syntax/fostr.sdf3" extract: + start: TermEx.Terminate + stop: (.*bracket.*) +!} +``` +(The first three clauses are in comments because they approximate fostr's +grammar; it actually uses a few more sorts for sequences of +expressions, to acheive fostr's exact layout rules.) + +This was the first point at which there were two different types that might +need to be written to standard output (Int and String), and although of course +the dynamically-typed Python and Javascript code generated dealt with both fine, +the Haskell code needed to differ depending on the +type of the item written (and I hadn't even started OCaml code generation at +that point since I knew it would be hopeless without statically typing fostr +programs). + +So it was time to bite the bullet and add type checking via Statix to fostr. +**/ + +// see docs/implementation.md for detail on how to switch to multi-file analysis rules // single-file entry point From 5cd75b817748695db47624f31f9f14b5503a9d55 Mon Sep 17 00:00:00 2001 From: Glen Whitney Date: Sun, 14 Feb 2021 10:25:23 -0800 Subject: [PATCH 03/12] feat: Initial statix assignment of types to expressions --- bin/extract_tests.xsh | 4 +- editor/Analysis.esv | 1 + tests/basic.spt | 10 +++ trans/analysis.str | 24 +++++++ trans/statics.stx | 154 ++++++++++++++++++++++++++++++++++++++++-- 5 files changed, 186 insertions(+), 7 deletions(-) diff --git a/bin/extract_tests.xsh b/bin/extract_tests.xsh index 79b2ca8..7cf131a 100644 --- a/bin/extract_tests.xsh +++ b/bin/extract_tests.xsh @@ -27,9 +27,9 @@ for path in TEST_LIST: if pfm: continue # skip examples that don't parse ntfm = re.search(r'\n\s*\]\].*?don.t.test', details) if ntfm: continue # explicit skip - em = re.search(r'\n\s*\]\]', details) + em = re.search(r'\n\]\]', details) if not em: continue - example = details[:em.start()+1] + example = details[:em.start()+1].replace('[[','').replace(']]','') expath = destdir / f"{name}.{EXT}" expath.write_text(example) echo Wrote @(expath) diff --git a/editor/Analysis.esv b/editor/Analysis.esv index 0667f1e..eb3197f 100644 --- a/editor/Analysis.esv +++ b/editor/Analysis.esv @@ -22,3 +22,4 @@ menus action: "Show pre-analyzed AST" = debug-show-pre-analyzed (source) action: "Show analyzed AST" = debug-show-analyzed + action: "Show analyzed type" = debug-show-type diff --git a/tests/basic.spt b/tests/basic.spt index e3f4f69..075a4c3 100644 --- a/tests/basic.spt +++ b/tests/basic.spt @@ -1,6 +1,16 @@ module basic language fostr + +test hw1_type [[ +[[stream]] << [['Hello, world!']] +]] +run get-type on #1 to STREAM() +run get-type on #2 to STRING() +run get-type to STREAM() +/** writes +Hello, world!**/ + /** md Title: A whirlwind tour of fostr diff --git a/trans/analysis.str b/trans/analysis.str index 70919cc..858d1ec 100644 --- a/trans/analysis.str +++ b/trans/analysis.str @@ -1,5 +1,14 @@ module analysis +signature + sorts + TYPE + + constructors + INT : TYPE + STRING : TYPE + STREAM : TYPE + imports statixruntime @@ -51,3 +60,18 @@ rules // Debugging debug-show-analyzed: (sel, _, _, path, projp) -> (filename, result) with filename := path ; result := sel + + // Extract the type assigned to a node by Statix + get-type: node -> type + where + // Assigns variable a to be the result of the Statix analysis of the entire program (or throws an error) + a := node]])>; + // Gets the type of the given node (or throws an error) + type := node]])> node + + fail-msg(|msg) = err-msg(|$[get-type: [msg]]); fail + + // Prints the analyzed type of a selection. + debug-show-type: (sel, _, _, path, projp) -> (filename, result) + with filename := path + ; result := sel diff --git a/trans/statics.stx b/trans/statics.stx index fa795fe..0099855 100644 --- a/trans/statics.stx +++ b/trans/statics.stx @@ -25,17 +25,18 @@ to be "OK" from the point of view of Statix: Then I reached the point at which the grammar was basically just ```SDF3 -// Start.TopLevel = -// Ex.Sequence = sq:Ex+ {layout: align-list sq} +// Start.TopLevel = +// Seq = +// Seq.Sequence = sq:Ex+ {layout(align-list sq)} // Ex.Terminated = <;> {! "\git docs/statix_start:syntax/fostr.sdf3" extract: start: TermEx.Terminate stop: (.*bracket.*) !} ``` -(The first three clauses are in comments because they approximate fostr's +(The first four clauses are in comments because they approximate fostr's grammar; it actually uses a few more sorts for sequences of -expressions, to acheive fostr's exact layout rules.) +expressions, to achieve fostr's exact layout rules.) This was the first point at which there were two different types that might need to be written to standard output (Int and String), and although of course @@ -46,15 +47,158 @@ that point since I knew it would be hopeless without statically typing fostr programs). So it was time to bite the bullet and add type checking via Statix to fostr. +The first step is to replace the simple assertion that any TopLevel +is OK with a constraint that its Seq must type properly, and an assignment of +that type to the top level node: +```statix +programOk(tl@TopLevel(seq)) :- {T} + type_Seq(seq) == T, + @tl.type := T. +``` +Of course, for this to even parse, we must have a definition of `type_Seq`: +```statix **/ +/** md */ +signature + sorts TYPE // semantic type + constructors + INT : TYPE + STRING : TYPE + STREAM : TYPE +/* **/ + // see docs/implementation.md for detail on how to switch to multi-file analysis rules // single-file entry point programOk : Start - programOk(TopLevel(_)). + /** md +rules + type_Seq : Seq -> TYPE +``` + **/ + + type_LineSeq : LineSeq -> TYPE + + programOk(tl@TopLevel(seq)) :- {T} + type_LineSeq(seq) == T, + @tl.type := T. + + /** md +Now to type a Seq, we look to the syntax, and see that there are two +possibilities for what it might be: just an Ex, or a Sequence(_) of a +list of 'Ex's. For the first, Statix does not allow one sort to simply +"become" another, but the Spoofax infrastructure automatically inserts +"injection" constructors for us, in this case one named Ex2Seq. So the +first rule for `type_Seq` is straightforward: + +```statix + type_Seq(s@Ex2Seq(e)) = T : - + type_Ex(e) == T, + @s.type := T. +``` +where of course type_Ex needs its own declaration analogous to the above. + **/ + + type_Line : Line -> TYPE + + type_LineSeq(ls@Line2LineSeq(l)) = T :- + type_Line(l) == T, + @ls.type := T. + + type_OptTermEx : OptTermEx -> TYPE + + type_Line(l@OptTermEx2Line(ote)) = T :- + type_OptTermEx(ote) == T, + @l.type := T. + + type_Ex : Ex -> TYPE + + type_OptTermEx(ote@Ex2OptTermEx(e)) = T :- + type_Ex(e) == T, + @ote.type :=T. + + /** md + +This brings us to the syntax rules for the basic expressions themselves, +which comprise almost all of the remaining fostr language constructs. +But first a mechanism suggested by Ivo Wilms to avoid repeating the node +type annotation in every rule: +```statix + **/ + + /** md */ + ty_Ex : Ex -> TYPE + + type_Ex(e) = ty@ty_Ex(e) :- + @e.type := ty. + /* **/ + + /** md +``` +At this stage in fostr's development, there was no difference between a +terminated and unterminated expression, so the typing rule for that +constructor was trivial: +```statix + ty_Ex(Terminated(e)) = ty_Ex(e). +``` + **/ + + type_TermEx: TermEx -> TYPE + + type_TermEx(te@Terminate(e)) = T :- + type_Ex(e) == T, + @te.type := T. + + /** md + +Now typing literals is straightforward: +```statix + **/ + + /** md */ + ty_Ex(Int(_)) = INT(). + ty_Ex(LitString(_)) = STRING(). + ty_Ex(e@Stream()) = STREAM(). + /* **/ + + /** md +``` + +Finally we get to the binary operators, and here we use the pattern found in +recent versions of the +"[chicago](https://github.com/MetaBorgCube/statix-sandbox/tree/master/chicago)" +example language and in the Fall 2020 TU-Delft class lecture on +[Name Binding and Name Resolution](https://tudelft-cs4200-2020.github.io/lectures/2020/09/24/lecture5/). +This pattern lets us specify error messages. + +```statix + **/ + + /** md */ + ty_Ex(Sum(e1, e2)) = INT() :- + type_Ex(e1) == INT() | error $[Expression [e1] not an Int in sum.]@e1, + type_Ex(e2) == INT() | error $[Expression [e2] not an Int in sum.]@e2. + + ty_Ex(Gets(e1, e2)) = STREAM() :- {T} + type_Ex(e1) == STREAM() | error $[Only Streams may receive items.]@e1, + type_Ex(e2) == T. + + ty_Ex(To(e1, e2)) = T :- + type_Ex(e1) == T, + type_Ex(e2) == STREAM() | error $[Items may only be sent to Streams.]@e2. + /* **/ + + /** md +``` + +### Using type annotations in transformation + +_Probably want to include stuff from analysis.str/ haskell.str here_ + + **/ rules // multi-file entry point From 804a00902a2ad6fea172378caa235acf701d0a0a Mon Sep 17 00:00:00 2001 From: Glen Whitney Date: Tue, 16 Feb 2021 09:46:12 -0800 Subject: [PATCH 04/12] feat: Type-dependent Haskell code generation Caveat: type is still not being assigned for the Sequence() constructor. Also fixes the parsing of literal strings (whitespace just after the opening quote was being ignored, and was ambiguous just before the opening quote). --- README.md | 3 +- syntax/fostr.sdf3 | 4 +-- tests/basic.spt | 6 ++-- trans/haskell.str | 74 ++++++++++++++++++++++++++++++-------------- trans/javascript.str | 6 ++-- trans/python.str | 2 +- 6 files changed, 62 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index 91e1605..0df4cba 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,8 @@ language as possible to work in, given that I inevitably will be doing a bunch of coding. The language will be centrally organized around the concept of "streams" (somewhat in the spirit of [streem](https://github.com/matz/streem) and/or -[Orc](http://orc.csres.utexas.edu/index.shtml)). In fact all higher-type +[Orc](http://orc.csres.utexas.edu/index.shtml), or to a lesser extent, +[Sisal-is](https://github.com/parsifal-47/sisal-is)). In fact all higher-type entities will be cast in terms of streams, or in slogan form, "++f++unctions and (binary) ++o++perators are ++str++eams" (hence the name "fostr"). diff --git a/syntax/fostr.sdf3 b/syntax/fostr.sdf3 index 7805b3f..4dc4569 100644 --- a/syntax/fostr.sdf3 +++ b/syntax/fostr.sdf3 @@ -14,7 +14,7 @@ lexical sorts lexical syntax - STRING_LITERAL = ~[\']* + STRING_LITERAL = "'"~[\']*"'" context-free sorts @@ -38,7 +38,7 @@ context-free syntax TermEx.Terminate = <;> Ex.Int = INT - Ex.LitString = <''> + Ex.LitString = STRING_LITERAL Ex.Stream = Ex.Sum = < + > {left} Ex.Gets = [[Ex] << [Ex]] {left} diff --git a/tests/basic.spt b/tests/basic.spt index 075a4c3..e1cadf9 100644 --- a/tests/basic.spt +++ b/tests/basic.spt @@ -1,15 +1,15 @@ module basic language fostr - test hw1_type [[ -[[stream]] << [['Hello, world!']] +[[stream]] << [['Hello, world! ']] << [[3+2]] << ' times.' ]] run get-type on #1 to STREAM() run get-type on #2 to STRING() +run get-type on #3 to INT() run get-type to STREAM() /** writes -Hello, world!**/ +Hello, world! 5 times.**/ /** md Title: A whirlwind tour of fostr diff --git a/trans/haskell.str b/trans/haskell.str index 63c969e..a22fe36 100644 --- a/trans/haskell.str +++ b/trans/haskell.str @@ -1,15 +1,25 @@ module haskell -imports libstrategolib signatures/- util +imports libstrategolib signatures/- util analysis rules - /* Approach: Generate code from the bottom up. - At every node, we create a pair of the implementation and - necessary preamble of IO actions. - We concatenate preambles as we go up. - Finally, at the toplevel we emit the preamble before returning the - final value. + /* Approach: + A) We will define a local transformation taking a term with value strings + at each child to a value string for the node. + B) We will append IO actions needed to set up for the value progressively + to a Preactions rule (mapping () to the list of actions). There will + be a utility `add-preaction` to append a new clause to value of this + rule. + C) We will use bottomup-para to traverse the full AST with the + transformation from A so that we have access to the original expression + (and get get the Statix-associated type when we need to). + Hence the transformation in (A) must actually take a pair of + an (original) term and a term with value strings at each child, + and be certain to return a value string. + + Finally, at the toplevel we emit the result of () before + returning the final value. */ - hs: TopLevel((c,p)) -> $[import System.IO + hs: (_, TopLevel(val)) -> $[import System.IO data IOStream = StdIO gets :: Show b => a -> b -> IO a @@ -17,25 +27,35 @@ rules putStr(show d) return s + getsStr :: a -> String -> IO a + getsStr s d = do + putStr(d) + return s + main = do - [p]return [c]] + [()]return [val]] - hs: Stream() -> ("StdIO", "") - hs: Int(x) -> (x, "") - hs: LitString(x) - -> ($["[x]"], "") - hs: Sum( (c, p), (d, q)) -> ($[([c] + [d])], (p,q)) + hs: (_, Stream()) -> "StdIO" + hs: (_, Int(x)) -> x + hs: (_, LitString(x)) -> x + hs: (_, Sum(x, y)) -> $[([x] + [y])] - hs: Gets((c, p), (d, q)) -> (c,d,(p,q),"fosgt") - hsget: (s, x, p, v) -> (v, [p, $[[v] <- [s] `gets` [x]], - "\n"]) + hs: (Gets(_, xn), Gets(s, x)) -> v + with v := "_fostr_get" + ; [$[[v] <- [(s, xn, x)]]] + hs: (To(xn, _), To(x, s)) -> v + with v := "_fostr_to" + ; [$[let [v] = [x]], (s, xn, v)] - hs: To( (c, p), (d, q)) -> (c,d,(p,q),"fosto") - hsto: (x, s, p, v) -> (v, [p, $[let [v] = [x]], "\n", - $[[s] `gets` [v]], "\n"]) + hs_gets: (s, xn, x ) -> $[[s] [xn] [x]] + hs_getOp = get-type; (?STRING() < !"`getsStr`" + !"`gets`") - hs: Terminate((c,p)) -> ($[[c];;], p) - hs: Sequence(l) -> (l, l) + hs: (_, Terminate(x)) -> $[[x];;] + hs: (_, Sequence(l)) -> l + /* One drawback of using paramorphism is at the very leaves we have + to undouble the tuple: + */ + hs: (x, x) -> x where x /* Characters we need to escape in Haskell string constants */ Hascape: ['\t' | cs ] -> ['\', 't' | cs ] @@ -47,9 +67,15 @@ rules Hascape: [ 12 | cs ] -> ['\', 'f' | cs ] // Form feed strategies - HaskellEscape = Escape <+ Hascape + haskLitString = un-single-quote + ; string-as-chars(escape-chars(Escape <+ Hascape)) + ; double-quote - haskell = bottomup(try(hs)) + haskell = rules(Preactions: () -> ""); bottomup-para(try(hs)) + + /* See "Approach" at top of file */ + add-preactions = newp := ((), ) + ; rules(Preactions: () -> newp) // Interface haskell code generation with editor services and file system to-haskell: (selected, _, _, path, project-path) -> (filename, result) diff --git a/trans/javascript.str b/trans/javascript.str index d0d97fe..645cc63 100644 --- a/trans/javascript.str +++ b/trans/javascript.str @@ -13,7 +13,7 @@ rules js: Stream() -> $[Stdio] js: Int(x) -> x - js: LitString(x) -> $['[x]'] + js: LitString(x) -> x js: Sum(x,y) -> $[[x] + [y]] js: Gets(x, y) -> $[[x].gets([y])] js: To(x, y) -> $[to([x],[y])] @@ -29,7 +29,9 @@ rules Jscape: [ 12 | cs ] -> ['\', 'f' | cs ] // Form feed strategies - JavaEscape = Escape <+ Jscape + javaLitString = un-single-quote + ; string-as-chars(escape-chars(Escape <+ Jscape)) + ; single-quote javascript = bottomup(try(js)) diff --git a/trans/python.str b/trans/python.str index d745cee..7aa4e06 100644 --- a/trans/python.str +++ b/trans/python.str @@ -15,7 +15,7 @@ rules py: Stream() -> $[Stdio] py: Int(x) -> x - py: LitString(x) -> $[r'[x]'] + py: LitString(x) -> $[r[x]] py: Sum(x,y) -> $[[x] + [y]] py: Gets(x, y) -> $[[x].gets([y])] py: To(x, y) -> $[to([x],[y])] From f93499acfd0c8328950aa0416be142f5659d7c97 Mon Sep 17 00:00:00 2001 From: Glen Whitney Date: Wed, 17 Feb 2021 11:20:26 -0800 Subject: [PATCH 05/12] feat: Add typing for Sequence() operation Also move the signature of the semantic sort TYPE into its own file to facilitate sharing between Statix and Stratego. (Currently it is shared via symbolic link, but that may cause problems down the line; if/when it does, will have to look at physically copying the file into src-gen via an "Additional build step" using either Stratego or Ant. Also documents using Statix types from Stratego. --- signature/TYPE.str | 1 + signature/TYPE.stx | 7 ++++ statics/util.stx | 7 ++++ tests/emit_thrice.fos | 2 +- trans/analysis.str | 10 ------ trans/haskell.str | 10 ++++-- trans/statics.stx | 74 ++++++++++++++++++++++++++++++++++++------- 7 files changed, 86 insertions(+), 25 deletions(-) create mode 120000 signature/TYPE.str create mode 100644 signature/TYPE.stx create mode 100644 statics/util.stx diff --git a/signature/TYPE.str b/signature/TYPE.str new file mode 120000 index 0000000..332d8ef --- /dev/null +++ b/signature/TYPE.str @@ -0,0 +1 @@ +TYPE.stx \ No newline at end of file diff --git a/signature/TYPE.stx b/signature/TYPE.stx new file mode 100644 index 0000000..e299f8b --- /dev/null +++ b/signature/TYPE.stx @@ -0,0 +1,7 @@ +module signature/TYPE +signature + sorts TYPE // semantic type + constructors + INT : TYPE + STRING : TYPE + STREAM : TYPE diff --git a/statics/util.stx b/statics/util.stx new file mode 100644 index 0000000..7ce94e7 --- /dev/null +++ b/statics/util.stx @@ -0,0 +1,7 @@ +module statics/util +imports signature/TYPE + +rules + lastTYPE : list(TYPE) -> TYPE + lastTYPE([T]) = T. + lastTYPE([U | TS]) = lastTYPE(TS). diff --git a/tests/emit_thrice.fos b/tests/emit_thrice.fos index 6042aad..232e2be 100644 --- a/tests/emit_thrice.fos +++ b/tests/emit_thrice.fos @@ -1,4 +1,4 @@ - stream << 72 + 87 + stream << 'Some numbers: ' stream << 88 + 96 99 + 12 >> diff --git a/trans/analysis.str b/trans/analysis.str index 858d1ec..e0c22ef 100644 --- a/trans/analysis.str +++ b/trans/analysis.str @@ -1,14 +1,4 @@ module analysis - -signature - sorts - TYPE - - constructors - INT : TYPE - STRING : TYPE - STREAM : TYPE - imports statixruntime diff --git a/trans/haskell.str b/trans/haskell.str index a22fe36..560b29c 100644 --- a/trans/haskell.str +++ b/trans/haskell.str @@ -1,5 +1,5 @@ module haskell -imports libstrategolib signatures/- util analysis +imports libstrategolib signatures/- signature/TYPE util analysis rules /* Approach: A) We will define a local transformation taking a term with value strings @@ -52,7 +52,13 @@ rules hs: (_, Terminate(x)) -> $[[x];;] hs: (_, Sequence(l)) -> l - /* One drawback of using paramorphism is at the very leaves we have + /* One drawback of using paramorphism is we have to handle lists + explicitly: + */ + hs: (_, []) -> [] + hs: (_, [x | xs]) -> [x | xs] + + /* Another drawback of using paramorphism is at the very leaves we have to undouble the tuple: */ hs: (x, x) -> x where x diff --git a/trans/statics.stx b/trans/statics.stx index 0099855..b6a7d83 100644 --- a/trans/statics.stx +++ b/trans/statics.stx @@ -1,6 +1,8 @@ module statics imports signatures/fostr-sig +imports signature/TYPE +imports statics/util /** md Title: Adding Program Analysis with Statix @@ -36,7 +38,10 @@ Then I reached the point at which the grammar was basically just ``` (The first four clauses are in comments because they approximate fostr's grammar; it actually uses a few more sorts for sequences of -expressions, to achieve fostr's exact layout rules.) +expressions, to achieve fostr's exact layout rules. Also note that the parsing +of literal strings later evolved to include the surrounding single quotes, +because the rule above implicitly allows layout between the quotes and the +string contents, creating ambiguity.) This was the first point at which there were two different types that might need to be written to standard output (Int and String), and although of course @@ -47,7 +52,7 @@ that point since I knew it would be hopeless without statically typing fostr programs). So it was time to bite the bullet and add type checking via Statix to fostr. -The first step is to replace the simple assertion that any TopLevel +The first step was to replace the simple assertion that any TopLevel is OK with a constraint that its Seq must type properly, and an assignment of that type to the top level node: ```statix @@ -57,17 +62,9 @@ programOk(tl@TopLevel(seq)) :- {T} ``` Of course, for this to even parse, we must have a definition of `type_Seq`: ```statix +{! ../signature/TYPE.stx extract: {start: module, stop: rules} !} **/ -/** md */ -signature - sorts TYPE // semantic type - constructors - INT : TYPE - STRING : TYPE - STREAM : TYPE -/* **/ - // see docs/implementation.md for detail on how to switch to multi-file analysis rules // single-file entry point @@ -108,6 +105,34 @@ where of course type_Ex needs its own declaration analogous to the above. type_Line(l) == T, @ls.type := T. + /** md + +The other (and in fact more typical) rule for `type_Seq`, when it actually +consists of a sequence of expressions, is a bit more involved. Fortunately +Statix provides a primitive for mapping over a list, so we can proceed as +follows: +```statix + types_Exs maps type_Ex(list(*)) = list(*) + type_Seq(s@Sequence(l)) = T :- {lt} + types_Exs(l) == lt, + lastTYPE(lt) == T, + @s.type := T. +``` +Here `lastTYPE` is a function that extracts the last TYPE from a list. +Unless/until Statix develops some sort of standard library, it must be +hand-defined, as done in "statics/util.stx" like so: +```statix +{! ../statics/util.stx extract: {start: lastTYPE} !} +``` + **/ + + types_Lines maps type_Line(list(*)) = list(*) + + type_LineSeq(ls@Sequence(l)) = T :- {lt} + types_Lines(l) == lt, + lastTYPE(lt) == T, + @ls.type := T. + type_OptTermEx : OptTermEx -> TYPE type_Line(l@OptTermEx2Line(ote)) = T :- @@ -196,8 +221,33 @@ This pattern lets us specify error messages. ### Using type annotations in transformation -_Probably want to include stuff from analysis.str/ haskell.str here_ +At this point, Statix properly types all of the valid programs of the very +rudimentary language defined by the grammar above. But the proximate purpose +for implementing this typing was to aid Haskell code generation. So how +do we actually use the assigned types in a Stratego transformation? +Statix provides a Stratego api that includes, among other items, strategies +`stx-get-ast-analysis` and `stx-get-ast-type(|analysis)` that provide access +to the assigned types. However, it's easiest to use the information via +a wrapper like this, essentially lifted from the "chicago" language project: +```stratego +{! analysis.str extract: + start: Extract.the.type +terminate: Prints.the.analyzed.type +!} +``` + +Now `get_type` run on a node of the analyzed AST produces the assigned `TYPE` +(as an ATerm in the constructors of sort TYPE in Statix). + +Thus, you can select on the assigned type, as in the strategy to select +the correct Haskell operator to use to send an item to standard output: +```stratego +{! haskell.str extract: + start: '(.*hs_getOp.=.*)' + stop: \s +!} +``` **/ rules // multi-file entry point From 2514f0df9804c4aa7e86c2958de79fec74d75c61 Mon Sep 17 00:00:00 2001 From: Glen Whitney Date: Wed, 17 Feb 2021 19:47:29 -0800 Subject: [PATCH 06/12] fix: handle the TermEx->OptTermEx injection in Statix --- trans/statics.stx | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/trans/statics.stx b/trans/statics.stx index b6a7d83..ef63c8d 100644 --- a/trans/statics.stx +++ b/trans/statics.stx @@ -139,11 +139,16 @@ hand-defined, as done in "statics/util.stx" like so: type_OptTermEx(ote) == T, @l.type := T. - type_Ex : Ex -> TYPE + type_Ex : Ex -> TYPE + type_TermEx : TermEx -> TYPE type_OptTermEx(ote@Ex2OptTermEx(e)) = T :- type_Ex(e) == T, - @ote.type :=T. + @ote.type := T. + + type_OptTermEx(ote@TermEx2OptTermEx(te)) = T :- + type_TermEx(te) == T, + @ote.type := T. /** md @@ -171,8 +176,6 @@ constructor was trivial: ``` **/ - type_TermEx: TermEx -> TYPE - type_TermEx(te@Terminate(e)) = T :- type_Ex(e) == T, @te.type := T. From 2772fd0c5ce56a685eeb069faaab5752dda075e5 Mon Sep 17 00:00:00 2001 From: Glen Whitney Date: Thu, 18 Feb 2021 04:17:05 +0000 Subject: [PATCH 07/12] Add literal string constants (#19) In addition, for the sake of Haskell code generation, this PR adds static typing with Statix. Resolves #5. Co-authored-by: Glen Whitney Reviewed-on: https://code.studioinfinity.org/glen/fostr/pulls/19 Co-Authored-By: Glen Whitney Co-Committed-By: Glen Whitney --- README.md | 3 +- bin/extract_tests.xsh | 4 +- editor/Analysis.esv | 1 + mkdocs.yml | 1 + signature/TYPE.str | 1 + signature/TYPE.stx | 7 ++ statics/util.stx | 7 ++ syntax/fostr.sdf3 | 21 +++- tests/basic.spt | 10 ++ tests/emit_thrice.fos | 2 +- tests/hw.fos | 1 + trans/analysis.str | 16 ++- trans/haskell.str | 86 +++++++++++---- trans/javascript.str | 12 ++ trans/python.str | 1 + trans/statics.stx | 247 +++++++++++++++++++++++++++++++++++++++++- 16 files changed, 386 insertions(+), 34 deletions(-) create mode 120000 signature/TYPE.str create mode 100644 signature/TYPE.stx create mode 100644 statics/util.stx create mode 100644 tests/hw.fos diff --git a/README.md b/README.md index 91e1605..0df4cba 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,8 @@ language as possible to work in, given that I inevitably will be doing a bunch of coding. The language will be centrally organized around the concept of "streams" (somewhat in the spirit of [streem](https://github.com/matz/streem) and/or -[Orc](http://orc.csres.utexas.edu/index.shtml)). In fact all higher-type +[Orc](http://orc.csres.utexas.edu/index.shtml), or to a lesser extent, +[Sisal-is](https://github.com/parsifal-47/sisal-is)). In fact all higher-type entities will be cast in terms of streams, or in slogan form, "++f++unctions and (binary) ++o++perators are ++str++eams" (hence the name "fostr"). diff --git a/bin/extract_tests.xsh b/bin/extract_tests.xsh index 79b2ca8..7cf131a 100644 --- a/bin/extract_tests.xsh +++ b/bin/extract_tests.xsh @@ -27,9 +27,9 @@ for path in TEST_LIST: if pfm: continue # skip examples that don't parse ntfm = re.search(r'\n\s*\]\].*?don.t.test', details) if ntfm: continue # explicit skip - em = re.search(r'\n\s*\]\]', details) + em = re.search(r'\n\]\]', details) if not em: continue - example = details[:em.start()+1] + example = details[:em.start()+1].replace('[[','').replace(']]','') expath = destdir / f"{name}.{EXT}" expath.write_text(example) echo Wrote @(expath) diff --git a/editor/Analysis.esv b/editor/Analysis.esv index 0667f1e..eb3197f 100644 --- a/editor/Analysis.esv +++ b/editor/Analysis.esv @@ -22,3 +22,4 @@ menus action: "Show pre-analyzed AST" = debug-show-pre-analyzed (source) action: "Show analyzed AST" = debug-show-analyzed + action: "Show analyzed type" = debug-show-type diff --git a/mkdocs.yml b/mkdocs.yml index 6726b34..e316a98 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -2,6 +2,7 @@ site_name: fostr language nav: - README.md - tests/basic.md +- trans/statics.md - implementation.md plugins: diff --git a/signature/TYPE.str b/signature/TYPE.str new file mode 120000 index 0000000..332d8ef --- /dev/null +++ b/signature/TYPE.str @@ -0,0 +1 @@ +TYPE.stx \ No newline at end of file diff --git a/signature/TYPE.stx b/signature/TYPE.stx new file mode 100644 index 0000000..e299f8b --- /dev/null +++ b/signature/TYPE.stx @@ -0,0 +1,7 @@ +module signature/TYPE +signature + sorts TYPE // semantic type + constructors + INT : TYPE + STRING : TYPE + STREAM : TYPE diff --git a/statics/util.stx b/statics/util.stx new file mode 100644 index 0000000..7ce94e7 --- /dev/null +++ b/statics/util.stx @@ -0,0 +1,7 @@ +module statics/util +imports signature/TYPE + +rules + lastTYPE : list(TYPE) -> TYPE + lastTYPE([T]) = T. + lastTYPE([U | TS]) = lastTYPE(TS). diff --git a/syntax/fostr.sdf3 b/syntax/fostr.sdf3 index 4a39131..4dc4569 100644 --- a/syntax/fostr.sdf3 +++ b/syntax/fostr.sdf3 @@ -8,6 +8,14 @@ context-free start-symbols Start +lexical sorts + + STRING_LITERAL + +lexical syntax + + STRING_LITERAL = "'"~[\']*"'" + context-free sorts Start LineSeq Line OptTermEx TermExLst TermEx Ex @@ -29,13 +37,14 @@ context-free syntax TermEx.Terminate = <;> - Ex.Int = INT - Ex.Stream = - Ex.Sum = [[Ex] + [Ex]] {left} - Ex.Gets = [[Ex] << [Ex]] {left} - Ex.To = [[Ex] >> [Ex]] {left} + Ex.Int = INT + Ex.LitString = STRING_LITERAL + Ex.Stream = + Ex.Sum = < + > {left} + Ex.Gets = [[Ex] << [Ex]] {left} + Ex.To = [[Ex] >> [Ex]] {left} - Ex = <()> {bracket} + Ex = <()> {bracket} context-free priorities diff --git a/tests/basic.spt b/tests/basic.spt index e3f4f69..e1cadf9 100644 --- a/tests/basic.spt +++ b/tests/basic.spt @@ -1,6 +1,16 @@ module basic language fostr +test hw1_type [[ +[[stream]] << [['Hello, world! ']] << [[3+2]] << ' times.' +]] +run get-type on #1 to STREAM() +run get-type on #2 to STRING() +run get-type on #3 to INT() +run get-type to STREAM() +/** writes +Hello, world! 5 times.**/ + /** md Title: A whirlwind tour of fostr diff --git a/tests/emit_thrice.fos b/tests/emit_thrice.fos index 6042aad..232e2be 100644 --- a/tests/emit_thrice.fos +++ b/tests/emit_thrice.fos @@ -1,4 +1,4 @@ - stream << 72 + 87 + stream << 'Some numbers: ' stream << 88 + 96 99 + 12 >> diff --git a/tests/hw.fos b/tests/hw.fos new file mode 100644 index 0000000..90fc0d4 --- /dev/null +++ b/tests/hw.fos @@ -0,0 +1 @@ +stream << 'Hello, world!' diff --git a/trans/analysis.str b/trans/analysis.str index 70919cc..e0c22ef 100644 --- a/trans/analysis.str +++ b/trans/analysis.str @@ -1,5 +1,4 @@ module analysis - imports statixruntime @@ -51,3 +50,18 @@ rules // Debugging debug-show-analyzed: (sel, _, _, path, projp) -> (filename, result) with filename := path ; result := sel + + // Extract the type assigned to a node by Statix + get-type: node -> type + where + // Assigns variable a to be the result of the Statix analysis of the entire program (or throws an error) + a := node]])>; + // Gets the type of the given node (or throws an error) + type := node]])> node + + fail-msg(|msg) = err-msg(|$[get-type: [msg]]); fail + + // Prints the analyzed type of a selection. + debug-show-type: (sel, _, _, path, projp) -> (filename, result) + with filename := path + ; result := sel diff --git a/trans/haskell.str b/trans/haskell.str index eae335e..560b29c 100644 --- a/trans/haskell.str +++ b/trans/haskell.str @@ -1,15 +1,25 @@ module haskell -imports libstrategolib signatures/- util +imports libstrategolib signatures/- signature/TYPE util analysis rules - /* Approach: Generate code from the bottom up. - At every node, we create a pair of the implementation and - necessary preamble of IO actions. - We concatenate preambles as we go up. - Finally, at the toplevel we emit the preamble before returning the - final value. + /* Approach: + A) We will define a local transformation taking a term with value strings + at each child to a value string for the node. + B) We will append IO actions needed to set up for the value progressively + to a Preactions rule (mapping () to the list of actions). There will + be a utility `add-preaction` to append a new clause to value of this + rule. + C) We will use bottomup-para to traverse the full AST with the + transformation from A so that we have access to the original expression + (and get get the Statix-associated type when we need to). + Hence the transformation in (A) must actually take a pair of + an (original) term and a term with value strings at each child, + and be certain to return a value string. + + Finally, at the toplevel we emit the result of () before + returning the final value. */ - hs: TopLevel((c,p)) -> $[import System.IO + hs: (_, TopLevel(val)) -> $[import System.IO data IOStream = StdIO gets :: Show b => a -> b -> IO a @@ -17,27 +27,61 @@ rules putStr(show d) return s + getsStr :: a -> String -> IO a + getsStr s d = do + putStr(d) + return s + main = do - [p]return [c]] + [()]return [val]] - hs: Stream() -> ("StdIO", "") - hs: Int(x) -> (x, "") - hs: Sum( (c, p), (d, q)) -> ($[([c] + [d])], (p,q)) + hs: (_, Stream()) -> "StdIO" + hs: (_, Int(x)) -> x + hs: (_, LitString(x)) -> x + hs: (_, Sum(x, y)) -> $[([x] + [y])] - hs: Gets((c, p), (d, q)) -> (c,d,(p,q),"fosgt") - hsget: (s, x, p, v) -> (v, [p, $[[v] <- [s] `gets` [x]], - "\n"]) + hs: (Gets(_, xn), Gets(s, x)) -> v + with v := "_fostr_get" + ; [$[[v] <- [(s, xn, x)]]] + hs: (To(xn, _), To(x, s)) -> v + with v := "_fostr_to" + ; [$[let [v] = [x]], (s, xn, v)] - hs: To( (c, p), (d, q)) -> (c,d,(p,q),"fosto") - hsto: (x, s, p, v) -> (v, [p, $[let [v] = [x]], "\n", - $[[s] `gets` [v]], "\n"]) + hs_gets: (s, xn, x ) -> $[[s] [xn] [x]] + hs_getOp = get-type; (?STRING() < !"`getsStr`" + !"`gets`") - hs: Terminate((c,p)) -> ($[[c];;], p) - hs: Sequence(l) -> (l, l) + hs: (_, Terminate(x)) -> $[[x];;] + hs: (_, Sequence(l)) -> l + /* One drawback of using paramorphism is we have to handle lists + explicitly: + */ + hs: (_, []) -> [] + hs: (_, [x | xs]) -> [x | xs] + + /* Another drawback of using paramorphism is at the very leaves we have + to undouble the tuple: + */ + hs: (x, x) -> x where x + + /* Characters we need to escape in Haskell string constants */ + Hascape: ['\t' | cs ] -> ['\', 't' | cs ] + /* I think I can just use ASCII constants for characters... */ + Hascape: [ 0 | cs ] -> ['\', '0' | cs ] + Hascape: [ 7 | cs ] -> ['\', 'a' | cs ] // Alert + Hascape: [ 8 | cs ] -> ['\', 'b' | cs ] // Backspace + Hascape: [ 11 | cs ] -> ['\', 'v' | cs ] // Vertical tab + Hascape: [ 12 | cs ] -> ['\', 'f' | cs ] // Form feed strategies + haskLitString = un-single-quote + ; string-as-chars(escape-chars(Escape <+ Hascape)) + ; double-quote - haskell = bottomup(try(hs)) + haskell = rules(Preactions: () -> ""); bottomup-para(try(hs)) + + /* See "Approach" at top of file */ + add-preactions = newp := ((), ) + ; rules(Preactions: () -> newp) // Interface haskell code generation with editor services and file system to-haskell: (selected, _, _, path, project-path) -> (filename, result) diff --git a/trans/javascript.str b/trans/javascript.str index 605b46a..645cc63 100644 --- a/trans/javascript.str +++ b/trans/javascript.str @@ -13,13 +13,25 @@ rules js: Stream() -> $[Stdio] js: Int(x) -> x + js: LitString(x) -> x js: Sum(x,y) -> $[[x] + [y]] js: Gets(x, y) -> $[[x].gets([y])] js: To(x, y) -> $[to([x],[y])] js: Terminate(x) -> x js: Sequence(l) -> l + /* Characters we need to escape in Javascript string constants */ + Jscape: ['\t' | cs ] -> ['\', 't' | cs ] + /* I think I can just use ASCII constants for characters... */ + Jscape: [ 0 | cs ] -> ['\', '0' | cs ] + Jscape: [ 8 | cs ] -> ['\', 'b' | cs ] // Backspace + Jscape: [ 11 | cs ] -> ['\', 'v' | cs ] // Vertical tab + Jscape: [ 12 | cs ] -> ['\', 'f' | cs ] // Form feed + strategies + javaLitString = un-single-quote + ; string-as-chars(escape-chars(Escape <+ Jscape)) + ; single-quote javascript = bottomup(try(js)) diff --git a/trans/python.str b/trans/python.str index 238c006..7aa4e06 100644 --- a/trans/python.str +++ b/trans/python.str @@ -15,6 +15,7 @@ rules py: Stream() -> $[Stdio] py: Int(x) -> x + py: LitString(x) -> $[r[x]] py: Sum(x,y) -> $[[x] + [y]] py: Gets(x, y) -> $[[x].gets([y])] py: To(x, y) -> $[to([x],[y])] diff --git a/trans/statics.stx b/trans/statics.stx index 7772698..ef63c8d 100644 --- a/trans/statics.stx +++ b/trans/statics.stx @@ -1,14 +1,257 @@ module statics imports signatures/fostr-sig +imports signature/TYPE +imports statics/util -// see docs/implementation.md for details on how to switch to multi-file analysis +/** md +Title: Adding Program Analysis with Statix + +## Development of fostr static analysis + +This section is more documentation of Spoofax in general and Statix +in particular than of fostr itself, but is being maintained here in case +it could be either helpful to someone getting started with Statix or +helpful in understanding how the static characteristics of fostr were designed. + +As mentioned in the [Overview](../README.md), I don't like to program and a +corollary of that is never to use a facility unless/until there's a need for +it. So the first few rudimentary passes at fostr simply declared every program +to be "OK" from the point of view of Statix: +```statix +{! "\git docs/statix_start:trans/statics.stx" extract: + start: programOk + stop: (.*TopLevel.*) +!} +``` + +Then I reached the point at which the grammar was basically just +```SDF3 +// Start.TopLevel = +// Seq = +// Seq.Sequence = sq:Ex+ {layout(align-list sq)} +// Ex.Terminated = <;> +{! "\git docs/statix_start:syntax/fostr.sdf3" extract: + start: TermEx.Terminate + stop: (.*bracket.*) +!} +``` +(The first four clauses are in comments because they approximate fostr's +grammar; it actually uses a few more sorts for sequences of +expressions, to achieve fostr's exact layout rules. Also note that the parsing +of literal strings later evolved to include the surrounding single quotes, +because the rule above implicitly allows layout between the quotes and the +string contents, creating ambiguity.) + +This was the first point at which there were two different types that might +need to be written to standard output (Int and String), and although of course +the dynamically-typed Python and Javascript code generated dealt with both fine, +the Haskell code needed to differ depending on the +type of the item written (and I hadn't even started OCaml code generation at +that point since I knew it would be hopeless without statically typing fostr +programs). + +So it was time to bite the bullet and add type checking via Statix to fostr. +The first step was to replace the simple assertion that any TopLevel +is OK with a constraint that its Seq must type properly, and an assignment of +that type to the top level node: +```statix +programOk(tl@TopLevel(seq)) :- {T} + type_Seq(seq) == T, + @tl.type := T. +``` +Of course, for this to even parse, we must have a definition of `type_Seq`: +```statix +{! ../signature/TYPE.stx extract: {start: module, stop: rules} !} +**/ + +// see docs/implementation.md for detail on how to switch to multi-file analysis rules // single-file entry point programOk : Start - programOk(TopLevel(_)). + /** md +rules + type_Seq : Seq -> TYPE +``` + **/ + + type_LineSeq : LineSeq -> TYPE + + programOk(tl@TopLevel(seq)) :- {T} + type_LineSeq(seq) == T, + @tl.type := T. + + /** md +Now to type a Seq, we look to the syntax, and see that there are two +possibilities for what it might be: just an Ex, or a Sequence(_) of a +list of 'Ex's. For the first, Statix does not allow one sort to simply +"become" another, but the Spoofax infrastructure automatically inserts +"injection" constructors for us, in this case one named Ex2Seq. So the +first rule for `type_Seq` is straightforward: + +```statix + type_Seq(s@Ex2Seq(e)) = T : - + type_Ex(e) == T, + @s.type := T. +``` +where of course type_Ex needs its own declaration analogous to the above. + **/ + + type_Line : Line -> TYPE + + type_LineSeq(ls@Line2LineSeq(l)) = T :- + type_Line(l) == T, + @ls.type := T. + + /** md + +The other (and in fact more typical) rule for `type_Seq`, when it actually +consists of a sequence of expressions, is a bit more involved. Fortunately +Statix provides a primitive for mapping over a list, so we can proceed as +follows: +```statix + types_Exs maps type_Ex(list(*)) = list(*) + type_Seq(s@Sequence(l)) = T :- {lt} + types_Exs(l) == lt, + lastTYPE(lt) == T, + @s.type := T. +``` +Here `lastTYPE` is a function that extracts the last TYPE from a list. +Unless/until Statix develops some sort of standard library, it must be +hand-defined, as done in "statics/util.stx" like so: +```statix +{! ../statics/util.stx extract: {start: lastTYPE} !} +``` + **/ + + types_Lines maps type_Line(list(*)) = list(*) + + type_LineSeq(ls@Sequence(l)) = T :- {lt} + types_Lines(l) == lt, + lastTYPE(lt) == T, + @ls.type := T. + + type_OptTermEx : OptTermEx -> TYPE + + type_Line(l@OptTermEx2Line(ote)) = T :- + type_OptTermEx(ote) == T, + @l.type := T. + + type_Ex : Ex -> TYPE + type_TermEx : TermEx -> TYPE + + type_OptTermEx(ote@Ex2OptTermEx(e)) = T :- + type_Ex(e) == T, + @ote.type := T. + + type_OptTermEx(ote@TermEx2OptTermEx(te)) = T :- + type_TermEx(te) == T, + @ote.type := T. + + /** md + +This brings us to the syntax rules for the basic expressions themselves, +which comprise almost all of the remaining fostr language constructs. +But first a mechanism suggested by Ivo Wilms to avoid repeating the node +type annotation in every rule: +```statix + **/ + + /** md */ + ty_Ex : Ex -> TYPE + + type_Ex(e) = ty@ty_Ex(e) :- + @e.type := ty. + /* **/ + + /** md +``` +At this stage in fostr's development, there was no difference between a +terminated and unterminated expression, so the typing rule for that +constructor was trivial: +```statix + ty_Ex(Terminated(e)) = ty_Ex(e). +``` + **/ + + type_TermEx(te@Terminate(e)) = T :- + type_Ex(e) == T, + @te.type := T. + + /** md + +Now typing literals is straightforward: +```statix + **/ + + /** md */ + ty_Ex(Int(_)) = INT(). + ty_Ex(LitString(_)) = STRING(). + ty_Ex(e@Stream()) = STREAM(). + /* **/ + + /** md +``` + +Finally we get to the binary operators, and here we use the pattern found in +recent versions of the +"[chicago](https://github.com/MetaBorgCube/statix-sandbox/tree/master/chicago)" +example language and in the Fall 2020 TU-Delft class lecture on +[Name Binding and Name Resolution](https://tudelft-cs4200-2020.github.io/lectures/2020/09/24/lecture5/). +This pattern lets us specify error messages. + +```statix + **/ + + /** md */ + ty_Ex(Sum(e1, e2)) = INT() :- + type_Ex(e1) == INT() | error $[Expression [e1] not an Int in sum.]@e1, + type_Ex(e2) == INT() | error $[Expression [e2] not an Int in sum.]@e2. + + ty_Ex(Gets(e1, e2)) = STREAM() :- {T} + type_Ex(e1) == STREAM() | error $[Only Streams may receive items.]@e1, + type_Ex(e2) == T. + + ty_Ex(To(e1, e2)) = T :- + type_Ex(e1) == T, + type_Ex(e2) == STREAM() | error $[Items may only be sent to Streams.]@e2. + /* **/ + + /** md +``` + +### Using type annotations in transformation + +At this point, Statix properly types all of the valid programs of the very +rudimentary language defined by the grammar above. But the proximate purpose +for implementing this typing was to aid Haskell code generation. So how +do we actually use the assigned types in a Stratego transformation? + +Statix provides a Stratego api that includes, among other items, strategies +`stx-get-ast-analysis` and `stx-get-ast-type(|analysis)` that provide access +to the assigned types. However, it's easiest to use the information via +a wrapper like this, essentially lifted from the "chicago" language project: +```stratego +{! analysis.str extract: + start: Extract.the.type +terminate: Prints.the.analyzed.type +!} +``` + +Now `get_type` run on a node of the analyzed AST produces the assigned `TYPE` +(as an ATerm in the constructors of sort TYPE in Statix). + +Thus, you can select on the assigned type, as in the strategy to select +the correct Haskell operator to use to send an item to standard output: +```stratego +{! haskell.str extract: + start: '(.*hs_getOp.=.*)' + stop: \s +!} +``` + **/ rules // multi-file entry point From 7d4d3b93c931d66d794c20ccd85ae55df550fb87 Mon Sep 17 00:00:00 2001 From: Glen Whitney Date: Thu, 18 Feb 2021 12:18:47 -0800 Subject: [PATCH 08/12] feat: add <<< and >>> abbreviating operations to default stream Resolves #16. --- syntax/fostr.sdf3 | 4 ++++ tests/basic.spt | 12 ++++++++++++ tests/emit_several.fos | 12 ++++++------ tests/hw.fos | 2 +- trans/desugar.str | 6 ++++-- trans/haskell.str | 3 ++- trans/javascript.str | 5 ++++- trans/python.str | 5 ++++- 8 files changed, 37 insertions(+), 12 deletions(-) diff --git a/syntax/fostr.sdf3 b/syntax/fostr.sdf3 index 4dc4569..c346eee 100644 --- a/syntax/fostr.sdf3 +++ b/syntax/fostr.sdf3 @@ -42,14 +42,18 @@ context-free syntax Ex.Stream = Ex.Sum = < + > {left} Ex.Gets = [[Ex] << [Ex]] {left} + Ex.DefGets = [<<< [Ex]] Ex.To = [[Ex] >> [Ex]] {left} + Ex.DefTo = [[Ex] >>>] Ex = <()> {bracket} context-free priorities Ex.To + > Ex.DefTo > Ex.Sum + > Ex.DefGets > Ex.Gets, // prevent cycle: no singletons diff --git a/tests/basic.spt b/tests/basic.spt index e1cadf9..494cafb 100644 --- a/tests/basic.spt +++ b/tests/basic.spt @@ -231,3 +231,15 @@ run desugar-fostr to TopLevel(Sequence([ Terminate(Sum(Int("11"), To(Int("12"), Stream()))), To(To(Int("13"), Stream()), Stream()) ])) + +test emit_several_default [[ +<<< 1 + 2; 3 >>> +(4 + 5) >>> >> stream; stream << 6; +<<< 7 << 75 +<<< 8 + + (9+10); +11 + 12 >>>; 13 >>> + >>> +]] parse succeeds +/** writes +3399677527121313*/ diff --git a/tests/emit_several.fos b/tests/emit_several.fos index c806fd3..e7cbac5 100644 --- a/tests/emit_several.fos +++ b/tests/emit_several.fos @@ -1,7 +1,7 @@ -stream << 1 + 2; 3 >> stream -(4 + 5) >> stream; stream << 6; -stream << 7 -stream << 8 +<<< 1 + 2; 3 >>> +(4 + 5) >>> >> stream; stream << 6; +<<< 7 << 75 +<<< 8 + (9+10); -11 + 12 >> stream; 13 >> stream - >> stream +11 + 12 >>>; 13 >>> + >>> diff --git a/tests/hw.fos b/tests/hw.fos index 90fc0d4..19eade4 100644 --- a/tests/hw.fos +++ b/tests/hw.fos @@ -1 +1 @@ -stream << 'Hello, world!' +<<< 'Hello, world!' diff --git a/trans/desugar.str b/trans/desugar.str index bfb4ecf..7276f38 100644 --- a/trans/desugar.str +++ b/trans/desugar.str @@ -15,7 +15,9 @@ rules seqFlatten: Sequence(l) -> Sequence() <+ ![])>l) + defStream: DefGets(x) -> Gets(Stream(), x) + defStream: DefTo(x) -> To(x, Stream()) + strategies - desugar-fostr = bottomup(try(deISe <+ seqFlatten)) - \ No newline at end of file + desugar-fostr = bottomup(try(defStream <+ deISe <+ seqFlatten)) diff --git a/trans/haskell.str b/trans/haskell.str index 560b29c..fc75a7b 100644 --- a/trans/haskell.str +++ b/trans/haskell.str @@ -19,7 +19,8 @@ rules returning the final value. */ - hs: (_, TopLevel(val)) -> $[import System.IO + hs: (_, TopLevel(val)) -> $[-- Preamble from fostr + import System.IO data IOStream = StdIO gets :: Show b => a -> b -> IO a diff --git a/trans/javascript.str b/trans/javascript.str index 645cc63..1fd2900 100644 --- a/trans/javascript.str +++ b/trans/javascript.str @@ -2,13 +2,16 @@ module javascript imports libstrategolib signatures/- util rules - js: TopLevel(x) -> $[const Stdio = { + js: TopLevel(x) -> $[// Fostr preamble + const Stdio = { gets: v => { process.stdout.write(String(v)); return Stdio; }, } function to(data, strm) { strm.gets(data); return data; } + // End of preamble + [x]] js: Stream() -> $[Stdio] diff --git a/trans/python.str b/trans/python.str index 7aa4e06..068f05a 100644 --- a/trans/python.str +++ b/trans/python.str @@ -2,7 +2,8 @@ module python imports libstrategolib signatures/- util rules - py: TopLevel(x) -> $[import sys + py: TopLevel(x) -> $[## Fostr preamble + import sys class StdioC: def gets(self, v): print(v, file=sys.stdout, end='') @@ -11,6 +12,8 @@ rules strm.gets(data) return data Stdio = StdioC() + ## End of preamble + [x]] py: Stream() -> $[Stdio] From d2ba26a53e5b96c0f6d313db9bfc9e2469968087 Mon Sep 17 00:00:00 2001 From: Glen Whitney Date: Thu, 18 Feb 2021 19:41:54 -0800 Subject: [PATCH 09/12] feat: Add double-quoted string constants with escapes Resolves #20. --- syntax/fostr.sdf3 | 1 + tests/hw2.fos | 1 + trans/haskell.str | 1 + trans/javascript.str | 1 + trans/python.str | 1 + trans/statics.stx | 9 ++++++--- 6 files changed, 11 insertions(+), 3 deletions(-) create mode 100644 tests/hw2.fos diff --git a/syntax/fostr.sdf3 b/syntax/fostr.sdf3 index c346eee..8624b58 100644 --- a/syntax/fostr.sdf3 +++ b/syntax/fostr.sdf3 @@ -39,6 +39,7 @@ context-free syntax Ex.Int = INT Ex.LitString = STRING_LITERAL + Ex.EscString = STRING Ex.Stream = Ex.Sum = < + > {left} Ex.Gets = [[Ex] << [Ex]] {left} diff --git a/tests/hw2.fos b/tests/hw2.fos new file mode 100644 index 0000000..c177d31 --- /dev/null +++ b/tests/hw2.fos @@ -0,0 +1 @@ +<<< "Hello,\t\tworld!\n\n" diff --git a/trans/haskell.str b/trans/haskell.str index fc75a7b..0e26050 100644 --- a/trans/haskell.str +++ b/trans/haskell.str @@ -39,6 +39,7 @@ rules hs: (_, Stream()) -> "StdIO" hs: (_, Int(x)) -> x hs: (_, LitString(x)) -> x + hs: (_, EscString(x)) -> x hs: (_, Sum(x, y)) -> $[([x] + [y])] hs: (Gets(_, xn), Gets(s, x)) -> v diff --git a/trans/javascript.str b/trans/javascript.str index 1fd2900..3a847e2 100644 --- a/trans/javascript.str +++ b/trans/javascript.str @@ -17,6 +17,7 @@ rules js: Stream() -> $[Stdio] js: Int(x) -> x js: LitString(x) -> x + js: EscString(x) -> x js: Sum(x,y) -> $[[x] + [y]] js: Gets(x, y) -> $[[x].gets([y])] js: To(x, y) -> $[to([x],[y])] diff --git a/trans/python.str b/trans/python.str index 068f05a..6449417 100644 --- a/trans/python.str +++ b/trans/python.str @@ -19,6 +19,7 @@ rules py: Stream() -> $[Stdio] py: Int(x) -> x py: LitString(x) -> $[r[x]] + py: EscString(x) -> x py: Sum(x,y) -> $[[x] + [y]] py: Gets(x, y) -> $[[x].gets([y])] py: To(x, y) -> $[to([x],[y])] diff --git a/trans/statics.stx b/trans/statics.stx index ef63c8d..340a1df 100644 --- a/trans/statics.stx +++ b/trans/statics.stx @@ -184,16 +184,19 @@ constructor was trivial: Now typing literals is straightforward: ```statix +{! "\git docs/statix_works:trans/statics.stx" extract: + start: '(.*ty_Ex.Int.*\s*)' + stop: '/. ../' +!} +``` **/ - /** md */ ty_Ex(Int(_)) = INT(). ty_Ex(LitString(_)) = STRING(). + ty_Ex(EscString(_)) = STRING(). ty_Ex(e@Stream()) = STREAM(). - /* **/ /** md -``` Finally we get to the binary operators, and here we use the pattern found in recent versions of the From f9c6e04c8ccb1b2dfc5d766755701d518597ae3a Mon Sep 17 00:00:00 2001 From: Glen Whitney Date: Fri, 19 Feb 2021 08:08:24 -0800 Subject: [PATCH 10/12] docs: Finally get the tour to start from the real helloworld Also improves the testing situation for the features to date. Resolves #17. --- tests/basic.spt | 151 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 101 insertions(+), 50 deletions(-) diff --git a/tests/basic.spt b/tests/basic.spt index 494cafb..215c83f 100644 --- a/tests/basic.spt +++ b/tests/basic.spt @@ -16,15 +16,21 @@ Title: A whirlwind tour of fostr ## Whirlwind tour -fostr is just in its infancy, so it's not yet even ready for -Hello, World. The best we can offer now is this little snippet -that writes the sum of the ASCII codes for 'H', 'W', and '!' to standard output: +There seems only to be one way to start a tour like this. So here goes: ```fostr **/ -/** md */ test emit_sum [[ -stream << 72 + 87 + 33 +/** md */ test hello_world [[ +<<< 'Hello, world!' ]] /* **/ +parse to TopLevel(DefGets(LitString("'Hello, world!'"))) +/** writes +Hello, world!**/ + +// Prior proto-hello-world, no longer in the tour. +test emit_sum [[ +stream << 72 + 87 + 33 +]] parse to TopLevel(Gets(Stream(), Sum(Sum(Int("72"), Int("87")), Int("33")))) /** writes 192**/ @@ -33,7 +39,7 @@ parse to TopLevel(Gets(Stream(), Sum(Sum(Int("72"), Int("87")), Int("33")))) ``` At the moment, there are only two ways to run a file containing fostr code -(you can find the above in `tests/emit_sum.fos`). They both start by +(you can find the above in `tests/hw.fos`). They both start by cloning this fostr project. Then, either: 1. Open the project in Eclipse and build it, visit your program file, @@ -46,30 +52,70 @@ cloning this fostr project. Then, either: For example, this snippet generates the following Python: ```python -{! ../tests/emit_sum.py extract: +{! ../tests/hw.py extract: start: 'Stdio\s=' !} ``` -(which writes "192" to standard output); it also generates identical code in -this simple example for -Javascript, although it generates a different preamble defining Stdio in each -case. (Haskell code generation is also currently supported.) +It generates nearly identical code in +this simple example for Javascript (just with `"Hello, world!"` +in place of `r'Hello, world!'`), although it generates a different +preamble defining Stdio for each language. (Currently, Haskell code +generation is also supported.) + +There's not much to break down in such a tiny program as this, but let's do +it. The prefix operator `<<<` could be read as "the default stream receives...", +and unsurprisingly in a main program the default stream is standard input and +output. And `'Hello, world!'` is a literal string constant; what you see is +what you get. The only detail to know is that such constants must occur +within a single line of your source file. So depending on how you +ran the program and how closely you looked at its output, +you may have noticed this program does not write a newline at the end +of its message. Nothing is ever implicitly sent to a stream. So if you want +newlines, you should switch to a (double-quoted) string that allows +the usual array of escape sequences: + +```fostr +**/ + +/** md */ test hello_esc_world [[ +<<< "Hello,\t\tworld!\n\n" +]] /* **/ +parse to TopLevel(DefGets(EscString("\"Hello,\t\tworld!\n\n\""))) +/** writes +Hello, world! + +**/ + +/** md +``` +(We threw in two of each so you could clearly see them in the output if +you run this program.) ### Everything has a value As mentioned in the [Introduction](../README.md), everything in a fostr program (including the entire program itself) is an expression and has -a value. So what's the value of that expression above? Well, appropriately -enough, `stream` is our -first example of a stream, and for convenience, the value of a stream -receiving an item is (usually) just the stream back again. The `<<` operator -is also left-associative, so that way we can chain insertions into a stream: +a value. So what's the value of that expression above? Well, for convenience, +the value of a stream receiving an item is (generally) just the stream back +again. That way we can use the general (left-associative) +`_stream_ << _value_` operator to chain insertions into a stream: ```fostr **/ -/** md */ test emit_twice [[ -stream << 72 + 87 + 33 << 291 +/** md */ test state_obvious [[ +<<< 'Two and ' << 2 << ' make ' << 2+2 << ".\n" ]] /* **/ +parse to TopLevel( + Gets(Gets(Gets(Gets(DefGets(LitString("'Two and '")),Int("2")), + LitString("' make '")),Sum(Int("2"),Int("2"))), + EscString("\"./n\""))) +/** writes +Two and 2 make 4. +**/ + +test emit_twice [[ +stream << 72 + 87 + 33 << 291 +]] parse to TopLevel( Gets(Gets(Stream(), Sum(Sum(Int("72"), Int("87")), Int("33"))), Int("291"))) /** writes @@ -77,24 +123,28 @@ parse to TopLevel( /** md ``` -Running this program produces a nice palindromic output: "192291". And because sometimes you want to emphasize the value and propagate that instead of the stream, you can also write these expressions "the other way" -with `>>`; both forms return the first argument, so the following writes "824": +with `>>>` for sending to the default stream or `>>` in general; these forms +(generally) return the value sent, so the following writes "824": ```fostr **/ /** md */ test enters_twice [[ -(7 + 8 >> stream + 9) >> stream +(7 + 8 >> stream + 9) >>> ]] /* **/ parse to TopLevel( - To(Sum(Sum(Int("7"), To(Int("8"), Stream())), Int("9")), Stream())) + DefTo(Sum(Sum(Int("7"), To(Int("8"), Stream())), Int("9")))) /** writes 824**/ /** md ``` +Two things are worth noting here: the default stream can always be referred to +directly via the identifier `stream`, and the precedences of `<<` and `>>` are +different so that generally full expressions go to a stream with `<<` but +just individual terms are sent with `>>`. ### Layout in fostr @@ -104,13 +154,13 @@ lines are indented from the start of the initial line: **/ /** md */ test receive_enter_break [[ -stream << +<<< 7 - + 8 >> stream + + 8 >>> + 9 ]] /* **/ parse to TopLevel( - Gets(Stream(), Sum(Sum(Int("7"), To(Int("8"), Stream())), Int("9")))) + DefGets(Sum(Sum(Int("7"), DefTo(Int("8"))), Int("9")))) /** writes 824**/ @@ -121,8 +171,8 @@ parse to TopLevel( **/ /** md */ test enter_receive_bad_continuation [[ -(7 + 8 >> stream + 9) ->> (stream << 9 + 2) +(7 + 8 >>> + 9) +>> (<<< 9 + 2) ]] /* **/ parse fails @@ -145,16 +195,17 @@ lines are evaluated in sequence. For example, the program **/ /** md */ test emit_thrice [[ - stream << 72 + 87 - stream << 88 + <<< 72 + 87 + <<< 88 + 96 - 99 + 12 >> - stream + 99 + 12 + >>> + ]] /* **/ parse to TopLevel(Sequence([ - Gets(Stream(), Sum(Int("72"), Int("87"))), - Gets(Stream(), Sum(Int("88"), Int("96"))), - Sum(Int("99"), To(Int("12"), Stream())) + DefGets(Sum(Int("72"), Int("87"))), + DefGets(Sum(Int("88"), Int("96"))), + Sum(Int("99"), DefTo(Int("12"))) ])) /** writes 15918412**/ @@ -169,10 +220,10 @@ in sequence align at the left; e.g., the following fails to parse: **/ /** md */ test emit_thrice_bad_alignment [[ - stream << 72 + 87 -stream << 88 + <<< 72 + 87 +<<< 88 + 96 - 99 + 12 >> stream + 99 + 12 >>> ]] /* **/ parse fails @@ -187,23 +238,23 @@ are so terminated. So the following is OK: **/ /** md */ test emit_several [[ - stream << 1 + 2; 3 >> stream - (4 + 5) >> stream; stream << 6; - stream << 7 - stream << 8 + <<< 1 + 2; 3 >>> + (4 + 5) >>>; stream << 6; + <<< 7 + <<< 8 + (9+10); - 11 + 12 >> stream; 13 >> stream - >> stream + 11 + 12 >>>; 13 >>> + >>> ]] /* **/ parse to TopLevel(Sequence([ - ISequence(Prior([Terminate(Gets(Stream(), Sum(Int("1"), Int("2"))))]), - To(Int("3"), Stream())), - ISequence(Prior([Terminate(To(Sum(Int("4"), Int("5")), Stream()))]), + ISequence(Prior([Terminate(DefGets(Sum(Int("1"), Int("2"))))]), + DefTo(Int("3"))), + ISequence(Prior([Terminate(DefTo(Sum(Int("4"), Int("5"))))]), Terminate(Gets(Stream(), Int("6")))), - Gets(Stream(), Int("7")), - Terminate(Gets(Stream(), Sum(Int("8"), Sum(Int("9"), Int("10"))))), - ISequence(Prior([Terminate(Sum(Int("11"), To(Int("12"), Stream())))]), - To(To(Int("13"), Stream()), Stream())) + DefGets(Int("7")), + Terminate(DefGets(Sum(Int("8"), Sum(Int("9"), Int("10"))))), + ISequence(Prior([Terminate(Sum(Int("11"), DefTo(Int("12"))))]), + DefTo(DefTo(Int("13")))) ])) /** writes 3396727121313**/ From cc89ad1e93a0e5da1080e30e0c45af53a850ffd0 Mon Sep 17 00:00:00 2001 From: Glen Whitney Date: Mon, 1 Mar 2021 20:40:35 +0000 Subject: [PATCH 11/12] Add OCaml code generation (#24) Also start using nailgun to speed up code generation. Resolves #6. Co-authored-by: Glen Whitney Reviewed-on: https://code.studioinfinity.org/glen/fostr/pulls/24 Co-Authored-By: Glen Whitney Co-Committed-By: Glen Whitney --- .drone.yml | 23 ++++++++++++++-- .gitignore | 6 +++++ bin/extract_tests.xsh | 1 + bin/fosgen | 20 +++++++++++++- bin/generate_test_code | 2 +- bin/let_sun_shine | 40 +++++++++++++++++++++++++++ editor/Generation.esv | 1 + tests/basic.spt | 6 ++--- trans/fostr.str | 1 + trans/haskell.str | 2 +- trans/ocaml.str | 61 ++++++++++++++++++++++++++++++++++++++++++ 11 files changed, 155 insertions(+), 8 deletions(-) create mode 100755 bin/let_sun_shine create mode 100644 trans/ocaml.str diff --git a/.drone.yml b/.drone.yml index 452f163..90b928b 100644 --- a/.drone.yml +++ b/.drone.yml @@ -29,7 +29,18 @@ steps: - java -jar /drone/lib/spt/org.metaborg.spt.cmd/target/org.metaborg.spt.cmd* -l . -s /drone/lib/spt/org.metaborg.meta.lang.spt -t tests - mkdir -p lib - curl -o lib/sunshine.jar -L 'http://artifacts.metaborg.org/service/local/artifact/maven/redirect?r=snapshots&g=org.metaborg&a=org.metaborg.sunshine2&v=LATEST' - - bin/fosgen tests/emit_sum.fos + - name: setup_gen + image: gcc + volumes: + - name: m2 + path: /root/.m2 + commands: + - git clone https://github.com/facebook/nailgun.git + - cd nailgun + - make + - cd ../bin + - ln -s ../nailgun/nailgun-client/target/ng . + - cd .. - name: extract_tests image: xonsh/xonsh commands: @@ -41,7 +52,8 @@ steps: path: /drone/lib - name: m2 path: /root/.m2 - commands: + commands: # Note we first make sure that fosgen is working + - bin/fosgen -d tests/emit_sum.fos - bin/generate_test_code - name: python_tests image: python:slim @@ -55,6 +67,13 @@ steps: image: haskell commands: - bin/run_tests runghc hs + - name: ocaml_tests + image: ocaml/opam + commands: + - ls -als tests/extracted + - opam init + - eval $(opam env) + - bin/run_tests ocaml ml volumes: - name: lib diff --git a/.gitignore b/.gitignore index 80a4a3c..35e6d95 100644 --- a/.gitignore +++ b/.gitignore @@ -12,10 +12,16 @@ .pydevproject +a.out + *.aterm /site +bin/ng tests/extracted/* tests/*.js tests/*.py tests/*.hs +tests/*.ml +tests/*.cmi +tests/*.cmo adhoc* diff --git a/bin/extract_tests.xsh b/bin/extract_tests.xsh index 7cf131a..1d8c59f 100644 --- a/bin/extract_tests.xsh +++ b/bin/extract_tests.xsh @@ -19,6 +19,7 @@ EXP = 'expect' for path in TEST_LIST: destdir = pf"{DESTINATION}/{path.stem}" mkdir -p @(destdir) + chmod ugo+rwx @(destdir) contents = path.read_text() tests = re.split(r'test\s*(.+?)\s*\[\[.*?\n', contents)[1:] testit = iter(tests) diff --git a/bin/fosgen b/bin/fosgen index c5af48b..b03656b 100755 --- a/bin/fosgen +++ b/bin/fosgen @@ -5,6 +5,7 @@ erro() { printf "%s\n" "$*" >&2; } ##### Set defaults: SUPPRESS_ERR=YES +USE_NAILGUN=YES LANGUAGE=Python ##### Extract command line options: @@ -14,18 +15,23 @@ do -h|--help) echo echo "Usage:" - echo " fosgen [-d] [-l LANGUAGE] FILE" + echo " fosgen [-d] [-j] [-l LANGUAGE] FILE" echo echo "Writes to standard output the code generated from the fostr" echo "program in FILE, targeting the specified LANGUAGE (which" echo "defaults to Python)." echo echo "The -d option writes diagnostic output to standard error." + echo "The -j option uses the Spoofax Sunshine JAR directly, rather" + echo "than via nailgun." exit ;; -d) SUPPRESS_ERR='' ;; + -j) + USE_NAILGUN='' + ;; -l) shift LANGUAGE="$1" @@ -67,5 +73,17 @@ then exec 2>/dev/null fi +if [[ $USE_NAILGUN ]] +then + if [[ $SUPPRESS_ERR ]] + then + $BINDIR/let_sun_shine + else + $BINDIR/let_sun_shine noisy + fi + $BINDIR/ng sunshine transform -p $PROJDIR -n $LANGUAGE -i $PROGRAM + exit $? +fi + java -jar $SUNJAR transform -p $PROJDIR -l $PROJDIR -l $MVN_REPO -n $LANGUAGE -i $PROGRAM exit $? diff --git a/bin/generate_test_code b/bin/generate_test_code index 2c443d6..0d151a8 100755 --- a/bin/generate_test_code +++ b/bin/generate_test_code @@ -4,7 +4,7 @@ failed=0 for dir in tests/extracted/*; do for file in $dir/*.fos; do - for language in Python Javascript Haskell; do + for language in Python Javascript Haskell OCaml; do echo bin/fosgen -l ${language%.*} $file ... bin/fosgen -l $language $file if [[ $? -ne 0 ]]; then diff --git a/bin/let_sun_shine b/bin/let_sun_shine new file mode 100755 index 0000000..cb3c67a --- /dev/null +++ b/bin/let_sun_shine @@ -0,0 +1,40 @@ +#!/bin/bash + +# Helper for fosgen, not intended to be used directly +# With an argument, print diagnostic output + +BINDIR=$(dirname $BASH_SOURCE) + +if $BINDIR/ng sunshine --help +then + if [[ $1 ]] + then + echo "sun already shining." + fi +else + if [[ $1 ]] + then + echo "disperse the clouds." + fi + SUNJAR="$BINDIR/../lib/sunshine.jar" + PROJDIR="$BINDIR/.." + if [[ ! $MVN_REPO ]]; then + MVN_REPO="$HOME/.m2/repository" + fi + if [[ ! -d $MVN_REPO ]]; then + MVN_REPO="/root/.m2/repository" + fi + if [[ ! -d $MVN_REPO ]]; then + echo "Cannot find your Maven repository. Please set environment variable" + echo "MVN_REPO to its full path and re-run." + exit 1 + fi + if [[ $1 ]] + then + java -jar $SUNJAR server & + else + java -jar $SUNJAR server >/dev/null 2>&1 & + fi + sleep 5 + $BINDIR/ng sunshine load -l $PROJDIR -l $MVN_REPO +fi diff --git a/editor/Generation.esv b/editor/Generation.esv index 3d71fe2..9e5051c 100644 --- a/editor/Generation.esv +++ b/editor/Generation.esv @@ -4,3 +4,4 @@ menus action: "Python" = to-python action: "Javascript" = to-javascript action: "Haskell" = to-haskell + action: "OCaml" = to-ocaml diff --git a/tests/basic.spt b/tests/basic.spt index 215c83f..a5e98c0 100644 --- a/tests/basic.spt +++ b/tests/basic.spt @@ -59,8 +59,8 @@ For example, this snippet generates the following Python: It generates nearly identical code in this simple example for Javascript (just with `"Hello, world!"` in place of `r'Hello, world!'`), although it generates a different -preamble defining Stdio for each language. (Currently, Haskell code -generation is also supported.) +preamble defining Stdio for each language. (Currently, Haskell and OCaml +code generation are also supported.) There's not much to break down in such a tiny program as this, but let's do it. The prefix operator `<<<` could be read as "the default stream receives...", @@ -108,7 +108,7 @@ again. That way we can use the general (left-associative) parse to TopLevel( Gets(Gets(Gets(Gets(DefGets(LitString("'Two and '")),Int("2")), LitString("' make '")),Sum(Int("2"),Int("2"))), - EscString("\"./n\""))) + EscString("\".\n\""))) /** writes Two and 2 make 4. **/ diff --git a/trans/fostr.str b/trans/fostr.str index d9a585a..91194d6 100644 --- a/trans/fostr.str +++ b/trans/fostr.str @@ -6,6 +6,7 @@ imports pp outline analysis + ocaml haskell javascript python diff --git a/trans/haskell.str b/trans/haskell.str index 0e26050..0b08716 100644 --- a/trans/haskell.str +++ b/trans/haskell.str @@ -10,7 +10,7 @@ rules rule. C) We will use bottomup-para to traverse the full AST with the transformation from A so that we have access to the original expression - (and get get the Statix-associated type when we need to). + (and can get the Statix-associated type when we need to). Hence the transformation in (A) must actually take a pair of an (original) term and a term with value strings at each child, and be certain to return a value string. diff --git a/trans/ocaml.str b/trans/ocaml.str new file mode 100644 index 0000000..11fcdca --- /dev/null +++ b/trans/ocaml.str @@ -0,0 +1,61 @@ +module ocaml +imports libstrategolib signatures/- util signature/TYPE analysis + +/* Note will use bottomup-para to traverse the full AST so that + we have access to the original expression (and can get the + Statix-associated type when we need to). + + This means that every one of our local rules must take a pair + of an original term and a term with every child replaced by + its generated code. + */ + +rules + ml: (_, TopLevel(x)) -> $[(* fostr preamble *) + type stream = { getS: string -> stream } + let rec stdio = { + getS = (fun s -> print_string s; stdio) + };; + (* End of preamble *) + + [x]] + + ml: (_, Stream()) -> $[stdio] + ml: (_, Int(x)) -> x + ml: (_, LitString(x)) -> $[{|[x]|}] + ml: (_, EscString(x)) -> x + ml: (_, Sum(x,y)) -> $[[x] + [y]] + ml: (Gets(_,yn), Gets(x, y)) + -> $[([x]).getS ([(yn,y)])] + ml: (To(xn,_), To(x, y)) + -> $[let _fto = ([x]) in (ignore (([y]).getS ([(xn,"_fto")])); _fto)] + ml: (_, Terminate(x)) -> x + ml: (_, Sequence(l)) -> l + + ml_seq: [x] -> x + ml_seq: [x | xs ] -> $[ignore ([x]); +[xs]] + + /* One drawback of using paramorphism is we have to handle lists + explicitly: + */ + ml: (_, []) -> [] + ml: (_, [x | xs]) -> [x | xs] + + /* Another drawback of using paramorphism is at the very leaves we have + to undouble the tuple: + */ + ml: (x, x) -> x where x + + ml_str: (node, code) -> $[[node]([code])] + +strategies + + ml_string_cast = get-type; (?INT() < !"string_of_int" + !"") + + ocaml = bottomup-para(try(ml)) + + // Interface ocaml code generation with editor services and file system + to-ocaml: (selected, _, _, path, project-path) -> (filename, result) + with filename := path + ; result := selected From 7feddbcfbedce0ed2623ccc6f7b338c41bdf78e7 Mon Sep 17 00:00:00 2001 From: Glen Whitney Date: Sat, 13 Mar 2021 19:30:23 +0000 Subject: [PATCH 12/12] Extraction from streams (#25) Add the ! postfix operator and !! expression. Also add the ++ string concatenation operator. Also allow specification of standard input in the test scheme. Resolves #7, #18. Co-authored-by: Glen Whitney Reviewed-on: https://code.studioinfinity.org/glen/fostr/pulls/25 Co-Authored-By: Glen Whitney Co-Committed-By: Glen Whitney --- bin/extract_tests.xsh | 8 ++++++++ bin/run_tests | 10 ++++++++-- mkdocs.yml | 2 +- syntax/fostr.sdf3 | 5 ++++- tests/basic.spt | 36 +++++++++++++++++++++++++++++++++++- trans/desugar.str | 1 + trans/haskell.str | 10 ++++++++++ trans/javascript.str | 23 ++++++++++++++++++++--- trans/ocaml.str | 11 ++++++++--- trans/python.str | 4 ++++ trans/statics.stx | 7 +++++++ 11 files changed, 106 insertions(+), 11 deletions(-) diff --git a/bin/extract_tests.xsh b/bin/extract_tests.xsh index 1d8c59f..422acb1 100644 --- a/bin/extract_tests.xsh +++ b/bin/extract_tests.xsh @@ -13,6 +13,9 @@ DESTINATION = 'tests/extracted' # Extension for extracted files: EXT = 'fos' +# Extension for desired input: +INP = 'in' + # Extension for expectations: EXP = 'expect' @@ -34,6 +37,11 @@ for path in TEST_LIST: expath = destdir / f"{name}.{EXT}" expath.write_text(example) echo Wrote @(expath) + im = re.search(r'/\*\*\s+accepts.*?\n([\s\S]*?)\*\*/', details[em.end():]) + if im: + ipath = destdir / f"{name}.{INP}" + ipath.write_text(im[1]) + echo " ...and" @(ipath) xm = re.search(r'/\*\*\s+writes.*?\n([\s\S]*?)\*\*/', details[em.end():]) if xm: xpath = destdir / f"{name}.{EXP}" diff --git a/bin/run_tests b/bin/run_tests index 90ccaa7..da49485 100755 --- a/bin/run_tests +++ b/bin/run_tests @@ -9,8 +9,14 @@ diffed=0 for dir in tests/extracted/*; do for file in $dir/*.$ext; do ((total++)) - $command $file > $file.out - if [[ $? -ne 0 ]]; then + if [[ -f ${file%.*}.in ]]; then + cat ${file%.*}.in | $command $file > $file.out + result=$? + else + $command $file > $file.out + result=$? + fi + if [[ $result -ne 0 ]]; then echo ERROR: $command $file failed. ((failed++)) else diff --git a/mkdocs.yml b/mkdocs.yml index e316a98..c2d5242 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -9,7 +9,7 @@ plugins: - search - semiliterate: ignore_folders: [target, lib] - exclude_extensions: ['.o', '.hi'] + exclude_extensions: ['.o', '.hi', '.cmi', '.cmo'] extract_standard_markdown: terminate: theme: diff --git a/syntax/fostr.sdf3 b/syntax/fostr.sdf3 index 8624b58..f782466 100644 --- a/syntax/fostr.sdf3 +++ b/syntax/fostr.sdf3 @@ -42,10 +42,13 @@ context-free syntax Ex.EscString = STRING Ex.Stream = Ex.Sum = < + > {left} + Ex.Concat = < ++ > {left} Ex.Gets = [[Ex] << [Ex]] {left} Ex.DefGets = [<<< [Ex]] Ex.To = [[Ex] >> [Ex]] {left} Ex.DefTo = [[Ex] >>>] + Ex.Emits = <!> + Ex.DefEmits = Ex = <()> {bracket} @@ -53,7 +56,7 @@ context-free priorities Ex.To > Ex.DefTo - > Ex.Sum + > {Ex.Sum Ex.Concat} > Ex.DefGets > Ex.Gets, diff --git a/tests/basic.spt b/tests/basic.spt index a5e98c0..3d5a4a6 100644 --- a/tests/basic.spt +++ b/tests/basic.spt @@ -293,4 +293,38 @@ test emit_several_default [[ >>> ]] parse succeeds /** writes -3399677527121313*/ +3399677527121313**/ + +/** md +### Streams are bidirectional + +So far we have only sent items to a stream. But we can extract them from +streams as well, with the `!` postfix operator. `!!` all by itself abbreviates +`stream!`, i.e., extraction from the standard stream. For example, + +```fostr +**/ + +/** md */ test custom_hw [[ +<<< "What is your name?\n" +<<< 'Hello, ' ++ !! +]] /* **/ +parse to TopLevel(Sequence([ + DefGets(EscString("\"What is your name?\n\"")), + DefGets(Concat(LitString("'Hello, '"),DefEmits())) +])) +/** accepts +Kilroy +**/ +/** writes +What is your name? +Hello, Kilroy +**/ + +/** md +``` + +queries users for their name and then writes a customized greeting. It also +illustrates the use of `++` for string concatenation, as opposed to `+` for +(numerical) addition. +**/ diff --git a/trans/desugar.str b/trans/desugar.str index 7276f38..b22ebd8 100644 --- a/trans/desugar.str +++ b/trans/desugar.str @@ -17,6 +17,7 @@ rules defStream: DefGets(x) -> Gets(Stream(), x) defStream: DefTo(x) -> To(x, Stream()) + defStream: DefEmits() -> Emits(Stream()) strategies diff --git a/trans/haskell.str b/trans/haskell.str index 0b08716..0e0eb15 100644 --- a/trans/haskell.str +++ b/trans/haskell.str @@ -23,6 +23,7 @@ rules import System.IO data IOStream = StdIO + -- Danger: These currently assume the stream is StdIO gets :: Show b => a -> b -> IO a gets s d = do putStr(show d) @@ -33,6 +34,10 @@ rules putStr(d) return s + emit s = do + l <- getLine + return (l ++ "\n") + main = do [()]return [val]] @@ -41,6 +46,7 @@ rules hs: (_, LitString(x)) -> x hs: (_, EscString(x)) -> x hs: (_, Sum(x, y)) -> $[([x] + [y])] + hs: (_, Concat(x, y)) -> $[([x] ++ [y])] hs: (Gets(_, xn), Gets(s, x)) -> v with v := "_fostr_get" @@ -52,6 +58,10 @@ rules hs_gets: (s, xn, x ) -> $[[s] [xn] [x]] hs_getOp = get-type; (?STRING() < !"`getsStr`" + !"`gets`") + hs: (_, Emits(s)) -> v + with v := "_fostr_emitted" + ; [$[[v] <- emit [s]]] + hs: (_, Terminate(x)) -> $[[x];;] hs: (_, Sequence(l)) -> l /* One drawback of using paramorphism is we have to handle lists diff --git a/trans/javascript.str b/trans/javascript.str index 3a847e2..9b596ad 100644 --- a/trans/javascript.str +++ b/trans/javascript.str @@ -3,24 +3,41 @@ imports libstrategolib signatures/- util rules js: TopLevel(x) -> $[// Fostr preamble + const _fostr_readline = require('readline'); + const _fostr_events = require('events'); + const _fostr_rl = _fostr_readline.createInterface({input: process.stdin}); const Stdio = { - gets: v => { process.stdout.write(String(v)); return Stdio; }, + gets: v => { process.stdout.write(String(v)); return Stdio; }, + emit: async () => { + const [line] = await _fostr_events.once(_fostr_rl, 'line'); + return line + "\n"; } } function to(data, strm) { strm.gets(data); return data; } + + const _fostr_body = async () => { // End of preamble - [x]] + [x] + + // Fostr coda + _fostr_rl.close() + } + _fostr_body(); + ] + with line := "[line]" js: Stream() -> $[Stdio] js: Int(x) -> x js: LitString(x) -> x js: EscString(x) -> x - js: Sum(x,y) -> $[[x] + [y]] + js: Sum(x, y) -> $[[x] + [y]] + js: Concat(x, y) -> $[[x] + [y]] js: Gets(x, y) -> $[[x].gets([y])] js: To(x, y) -> $[to([x],[y])] + js: Emits(x) -> $[(await [x].emit())] js: Terminate(x) -> x js: Sequence(l) -> l diff --git a/trans/ocaml.str b/trans/ocaml.str index 11fcdca..049dbf4 100644 --- a/trans/ocaml.str +++ b/trans/ocaml.str @@ -12,9 +12,10 @@ imports libstrategolib signatures/- util signature/TYPE analysis rules ml: (_, TopLevel(x)) -> $[(* fostr preamble *) - type stream = { getS: string -> stream } + type stream = { getS: string -> stream; emitS: unit -> string } let rec stdio = { - getS = (fun s -> print_string s; stdio) + getS = (fun s -> print_string s; stdio); + emitS = (fun () -> (read_line ()) ^ "\n"); };; (* End of preamble *) @@ -24,11 +25,15 @@ rules ml: (_, Int(x)) -> x ml: (_, LitString(x)) -> $[{|[x]|}] ml: (_, EscString(x)) -> x - ml: (_, Sum(x,y)) -> $[[x] + [y]] + ml: (_, Sum(x, y)) -> $[[x] + [y]] + ml: (_, Concat(x, y)) -> $[[x] ^ [y]] + ml: (Gets(_,yn), Gets(x, y)) -> $[([x]).getS ([(yn,y)])] ml: (To(xn,_), To(x, y)) -> $[let _fto = ([x]) in (ignore (([y]).getS ([(xn,"_fto")])); _fto)] + ml: (_, Emits(s)) -> $[[s].emitS ()] + ml: (_, Terminate(x)) -> x ml: (_, Sequence(l)) -> l diff --git a/trans/python.str b/trans/python.str index 6449417..f42b1cb 100644 --- a/trans/python.str +++ b/trans/python.str @@ -8,6 +8,8 @@ rules def gets(self, v): print(v, file=sys.stdout, end='') return self + def emit(self): + return input() + "\n" # Python inconsistently strips when using input def to(data,strm): strm.gets(data) return data @@ -21,8 +23,10 @@ rules py: LitString(x) -> $[r[x]] py: EscString(x) -> x py: Sum(x,y) -> $[[x] + [y]] + py: Concat(x,y) -> $[[x] + [y]] py: Gets(x, y) -> $[[x].gets([y])] py: To(x, y) -> $[to([x],[y])] + py: Emits(x) -> $[[x].emit()] py: Terminate(x) -> $[[x];] py: Sequence(l) -> l diff --git a/trans/statics.stx b/trans/statics.stx index 340a1df..ce99bf7 100644 --- a/trans/statics.stx +++ b/trans/statics.stx @@ -222,6 +222,13 @@ This pattern lets us specify error messages. type_Ex(e2) == STREAM() | error $[Items may only be sent to Streams.]@e2. /* **/ + ty_Ex(Concat(e1, e2)) = STRING() :- + type_Ex(e1) == STRING() | error $[Expression [e1] not String in concat.]@e1, + type_Ex(e2) == STRING() | error $[Expression [e2] not String in concat.]@e2. + + ty_Ex(Emits(e)) = STRING() :- // At the moment, only stream is stdio + type_Ex(e) == STREAM() | error $[Only Streams may emit items.]@e. + /** md ```