From 2772fd0c5ce56a685eeb069faaab5752dda075e5 Mon Sep 17 00:00:00 2001 From: Glen Whitney Date: Thu, 18 Feb 2021 04:17:05 +0000 Subject: [PATCH] Add literal string constants (#19) In addition, for the sake of Haskell code generation, this PR adds static typing with Statix. Resolves #5. Co-authored-by: Glen Whitney Reviewed-on: https://code.studioinfinity.org/glen/fostr/pulls/19 Co-Authored-By: Glen Whitney Co-Committed-By: Glen Whitney --- README.md | 3 +- bin/extract_tests.xsh | 4 +- editor/Analysis.esv | 1 + mkdocs.yml | 1 + signature/TYPE.str | 1 + signature/TYPE.stx | 7 ++ statics/util.stx | 7 ++ syntax/fostr.sdf3 | 21 +++- tests/basic.spt | 10 ++ tests/emit_thrice.fos | 2 +- tests/hw.fos | 1 + trans/analysis.str | 16 ++- trans/haskell.str | 86 +++++++++++---- trans/javascript.str | 12 ++ trans/python.str | 1 + trans/statics.stx | 247 +++++++++++++++++++++++++++++++++++++++++- 16 files changed, 386 insertions(+), 34 deletions(-) create mode 120000 signature/TYPE.str create mode 100644 signature/TYPE.stx create mode 100644 statics/util.stx create mode 100644 tests/hw.fos diff --git a/README.md b/README.md index 91e1605..0df4cba 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,8 @@ language as possible to work in, given that I inevitably will be doing a bunch of coding. The language will be centrally organized around the concept of "streams" (somewhat in the spirit of [streem](https://github.com/matz/streem) and/or -[Orc](http://orc.csres.utexas.edu/index.shtml)). In fact all higher-type +[Orc](http://orc.csres.utexas.edu/index.shtml), or to a lesser extent, +[Sisal-is](https://github.com/parsifal-47/sisal-is)). In fact all higher-type entities will be cast in terms of streams, or in slogan form, "++f++unctions and (binary) ++o++perators are ++str++eams" (hence the name "fostr"). diff --git a/bin/extract_tests.xsh b/bin/extract_tests.xsh index 79b2ca8..7cf131a 100644 --- a/bin/extract_tests.xsh +++ b/bin/extract_tests.xsh @@ -27,9 +27,9 @@ for path in TEST_LIST: if pfm: continue # skip examples that don't parse ntfm = re.search(r'\n\s*\]\].*?don.t.test', details) if ntfm: continue # explicit skip - em = re.search(r'\n\s*\]\]', details) + em = re.search(r'\n\]\]', details) if not em: continue - example = details[:em.start()+1] + example = details[:em.start()+1].replace('[[','').replace(']]','') expath = destdir / f"{name}.{EXT}" expath.write_text(example) echo Wrote @(expath) diff --git a/editor/Analysis.esv b/editor/Analysis.esv index 0667f1e..eb3197f 100644 --- a/editor/Analysis.esv +++ b/editor/Analysis.esv @@ -22,3 +22,4 @@ menus action: "Show pre-analyzed AST" = debug-show-pre-analyzed (source) action: "Show analyzed AST" = debug-show-analyzed + action: "Show analyzed type" = debug-show-type diff --git a/mkdocs.yml b/mkdocs.yml index 6726b34..e316a98 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -2,6 +2,7 @@ site_name: fostr language nav: - README.md - tests/basic.md +- trans/statics.md - implementation.md plugins: diff --git a/signature/TYPE.str b/signature/TYPE.str new file mode 120000 index 0000000..332d8ef --- /dev/null +++ b/signature/TYPE.str @@ -0,0 +1 @@ +TYPE.stx \ No newline at end of file diff --git a/signature/TYPE.stx b/signature/TYPE.stx new file mode 100644 index 0000000..e299f8b --- /dev/null +++ b/signature/TYPE.stx @@ -0,0 +1,7 @@ +module signature/TYPE +signature + sorts TYPE // semantic type + constructors + INT : TYPE + STRING : TYPE + STREAM : TYPE diff --git a/statics/util.stx b/statics/util.stx new file mode 100644 index 0000000..7ce94e7 --- /dev/null +++ b/statics/util.stx @@ -0,0 +1,7 @@ +module statics/util +imports signature/TYPE + +rules + lastTYPE : list(TYPE) -> TYPE + lastTYPE([T]) = T. + lastTYPE([U | TS]) = lastTYPE(TS). diff --git a/syntax/fostr.sdf3 b/syntax/fostr.sdf3 index 4a39131..4dc4569 100644 --- a/syntax/fostr.sdf3 +++ b/syntax/fostr.sdf3 @@ -8,6 +8,14 @@ context-free start-symbols Start +lexical sorts + + STRING_LITERAL + +lexical syntax + + STRING_LITERAL = "'"~[\']*"'" + context-free sorts Start LineSeq Line OptTermEx TermExLst TermEx Ex @@ -29,13 +37,14 @@ context-free syntax TermEx.Terminate = <;> - Ex.Int = INT - Ex.Stream = - Ex.Sum = [[Ex] + [Ex]] {left} - Ex.Gets = [[Ex] << [Ex]] {left} - Ex.To = [[Ex] >> [Ex]] {left} + Ex.Int = INT + Ex.LitString = STRING_LITERAL + Ex.Stream = + Ex.Sum = < + > {left} + Ex.Gets = [[Ex] << [Ex]] {left} + Ex.To = [[Ex] >> [Ex]] {left} - Ex = <()> {bracket} + Ex = <()> {bracket} context-free priorities diff --git a/tests/basic.spt b/tests/basic.spt index e3f4f69..e1cadf9 100644 --- a/tests/basic.spt +++ b/tests/basic.spt @@ -1,6 +1,16 @@ module basic language fostr +test hw1_type [[ +[[stream]] << [['Hello, world! ']] << [[3+2]] << ' times.' +]] +run get-type on #1 to STREAM() +run get-type on #2 to STRING() +run get-type on #3 to INT() +run get-type to STREAM() +/** writes +Hello, world! 5 times.**/ + /** md Title: A whirlwind tour of fostr diff --git a/tests/emit_thrice.fos b/tests/emit_thrice.fos index 6042aad..232e2be 100644 --- a/tests/emit_thrice.fos +++ b/tests/emit_thrice.fos @@ -1,4 +1,4 @@ - stream << 72 + 87 + stream << 'Some numbers: ' stream << 88 + 96 99 + 12 >> diff --git a/tests/hw.fos b/tests/hw.fos new file mode 100644 index 0000000..90fc0d4 --- /dev/null +++ b/tests/hw.fos @@ -0,0 +1 @@ +stream << 'Hello, world!' diff --git a/trans/analysis.str b/trans/analysis.str index 70919cc..e0c22ef 100644 --- a/trans/analysis.str +++ b/trans/analysis.str @@ -1,5 +1,4 @@ module analysis - imports statixruntime @@ -51,3 +50,18 @@ rules // Debugging debug-show-analyzed: (sel, _, _, path, projp) -> (filename, result) with filename := path ; result := sel + + // Extract the type assigned to a node by Statix + get-type: node -> type + where + // Assigns variable a to be the result of the Statix analysis of the entire program (or throws an error) + a := node]])>; + // Gets the type of the given node (or throws an error) + type := node]])> node + + fail-msg(|msg) = err-msg(|$[get-type: [msg]]); fail + + // Prints the analyzed type of a selection. + debug-show-type: (sel, _, _, path, projp) -> (filename, result) + with filename := path + ; result := sel diff --git a/trans/haskell.str b/trans/haskell.str index eae335e..560b29c 100644 --- a/trans/haskell.str +++ b/trans/haskell.str @@ -1,15 +1,25 @@ module haskell -imports libstrategolib signatures/- util +imports libstrategolib signatures/- signature/TYPE util analysis rules - /* Approach: Generate code from the bottom up. - At every node, we create a pair of the implementation and - necessary preamble of IO actions. - We concatenate preambles as we go up. - Finally, at the toplevel we emit the preamble before returning the - final value. + /* Approach: + A) We will define a local transformation taking a term with value strings + at each child to a value string for the node. + B) We will append IO actions needed to set up for the value progressively + to a Preactions rule (mapping () to the list of actions). There will + be a utility `add-preaction` to append a new clause to value of this + rule. + C) We will use bottomup-para to traverse the full AST with the + transformation from A so that we have access to the original expression + (and get get the Statix-associated type when we need to). + Hence the transformation in (A) must actually take a pair of + an (original) term and a term with value strings at each child, + and be certain to return a value string. + + Finally, at the toplevel we emit the result of () before + returning the final value. */ - hs: TopLevel((c,p)) -> $[import System.IO + hs: (_, TopLevel(val)) -> $[import System.IO data IOStream = StdIO gets :: Show b => a -> b -> IO a @@ -17,27 +27,61 @@ rules putStr(show d) return s + getsStr :: a -> String -> IO a + getsStr s d = do + putStr(d) + return s + main = do - [p]return [c]] + [()]return [val]] - hs: Stream() -> ("StdIO", "") - hs: Int(x) -> (x, "") - hs: Sum( (c, p), (d, q)) -> ($[([c] + [d])], (p,q)) + hs: (_, Stream()) -> "StdIO" + hs: (_, Int(x)) -> x + hs: (_, LitString(x)) -> x + hs: (_, Sum(x, y)) -> $[([x] + [y])] - hs: Gets((c, p), (d, q)) -> (c,d,(p,q),"fosgt") - hsget: (s, x, p, v) -> (v, [p, $[[v] <- [s] `gets` [x]], - "\n"]) + hs: (Gets(_, xn), Gets(s, x)) -> v + with v := "_fostr_get" + ; [$[[v] <- [(s, xn, x)]]] + hs: (To(xn, _), To(x, s)) -> v + with v := "_fostr_to" + ; [$[let [v] = [x]], (s, xn, v)] - hs: To( (c, p), (d, q)) -> (c,d,(p,q),"fosto") - hsto: (x, s, p, v) -> (v, [p, $[let [v] = [x]], "\n", - $[[s] `gets` [v]], "\n"]) + hs_gets: (s, xn, x ) -> $[[s] [xn] [x]] + hs_getOp = get-type; (?STRING() < !"`getsStr`" + !"`gets`") - hs: Terminate((c,p)) -> ($[[c];;], p) - hs: Sequence(l) -> (l, l) + hs: (_, Terminate(x)) -> $[[x];;] + hs: (_, Sequence(l)) -> l + /* One drawback of using paramorphism is we have to handle lists + explicitly: + */ + hs: (_, []) -> [] + hs: (_, [x | xs]) -> [x | xs] + + /* Another drawback of using paramorphism is at the very leaves we have + to undouble the tuple: + */ + hs: (x, x) -> x where x + + /* Characters we need to escape in Haskell string constants */ + Hascape: ['\t' | cs ] -> ['\', 't' | cs ] + /* I think I can just use ASCII constants for characters... */ + Hascape: [ 0 | cs ] -> ['\', '0' | cs ] + Hascape: [ 7 | cs ] -> ['\', 'a' | cs ] // Alert + Hascape: [ 8 | cs ] -> ['\', 'b' | cs ] // Backspace + Hascape: [ 11 | cs ] -> ['\', 'v' | cs ] // Vertical tab + Hascape: [ 12 | cs ] -> ['\', 'f' | cs ] // Form feed strategies + haskLitString = un-single-quote + ; string-as-chars(escape-chars(Escape <+ Hascape)) + ; double-quote - haskell = bottomup(try(hs)) + haskell = rules(Preactions: () -> ""); bottomup-para(try(hs)) + + /* See "Approach" at top of file */ + add-preactions = newp := ((), ) + ; rules(Preactions: () -> newp) // Interface haskell code generation with editor services and file system to-haskell: (selected, _, _, path, project-path) -> (filename, result) diff --git a/trans/javascript.str b/trans/javascript.str index 605b46a..645cc63 100644 --- a/trans/javascript.str +++ b/trans/javascript.str @@ -13,13 +13,25 @@ rules js: Stream() -> $[Stdio] js: Int(x) -> x + js: LitString(x) -> x js: Sum(x,y) -> $[[x] + [y]] js: Gets(x, y) -> $[[x].gets([y])] js: To(x, y) -> $[to([x],[y])] js: Terminate(x) -> x js: Sequence(l) -> l + /* Characters we need to escape in Javascript string constants */ + Jscape: ['\t' | cs ] -> ['\', 't' | cs ] + /* I think I can just use ASCII constants for characters... */ + Jscape: [ 0 | cs ] -> ['\', '0' | cs ] + Jscape: [ 8 | cs ] -> ['\', 'b' | cs ] // Backspace + Jscape: [ 11 | cs ] -> ['\', 'v' | cs ] // Vertical tab + Jscape: [ 12 | cs ] -> ['\', 'f' | cs ] // Form feed + strategies + javaLitString = un-single-quote + ; string-as-chars(escape-chars(Escape <+ Jscape)) + ; single-quote javascript = bottomup(try(js)) diff --git a/trans/python.str b/trans/python.str index 238c006..7aa4e06 100644 --- a/trans/python.str +++ b/trans/python.str @@ -15,6 +15,7 @@ rules py: Stream() -> $[Stdio] py: Int(x) -> x + py: LitString(x) -> $[r[x]] py: Sum(x,y) -> $[[x] + [y]] py: Gets(x, y) -> $[[x].gets([y])] py: To(x, y) -> $[to([x],[y])] diff --git a/trans/statics.stx b/trans/statics.stx index 7772698..ef63c8d 100644 --- a/trans/statics.stx +++ b/trans/statics.stx @@ -1,14 +1,257 @@ module statics imports signatures/fostr-sig +imports signature/TYPE +imports statics/util -// see docs/implementation.md for details on how to switch to multi-file analysis +/** md +Title: Adding Program Analysis with Statix + +## Development of fostr static analysis + +This section is more documentation of Spoofax in general and Statix +in particular than of fostr itself, but is being maintained here in case +it could be either helpful to someone getting started with Statix or +helpful in understanding how the static characteristics of fostr were designed. + +As mentioned in the [Overview](../README.md), I don't like to program and a +corollary of that is never to use a facility unless/until there's a need for +it. So the first few rudimentary passes at fostr simply declared every program +to be "OK" from the point of view of Statix: +```statix +{! "\git docs/statix_start:trans/statics.stx" extract: + start: programOk + stop: (.*TopLevel.*) +!} +``` + +Then I reached the point at which the grammar was basically just +```SDF3 +// Start.TopLevel = +// Seq = +// Seq.Sequence = sq:Ex+ {layout(align-list sq)} +// Ex.Terminated = <;> +{! "\git docs/statix_start:syntax/fostr.sdf3" extract: + start: TermEx.Terminate + stop: (.*bracket.*) +!} +``` +(The first four clauses are in comments because they approximate fostr's +grammar; it actually uses a few more sorts for sequences of +expressions, to achieve fostr's exact layout rules. Also note that the parsing +of literal strings later evolved to include the surrounding single quotes, +because the rule above implicitly allows layout between the quotes and the +string contents, creating ambiguity.) + +This was the first point at which there were two different types that might +need to be written to standard output (Int and String), and although of course +the dynamically-typed Python and Javascript code generated dealt with both fine, +the Haskell code needed to differ depending on the +type of the item written (and I hadn't even started OCaml code generation at +that point since I knew it would be hopeless without statically typing fostr +programs). + +So it was time to bite the bullet and add type checking via Statix to fostr. +The first step was to replace the simple assertion that any TopLevel +is OK with a constraint that its Seq must type properly, and an assignment of +that type to the top level node: +```statix +programOk(tl@TopLevel(seq)) :- {T} + type_Seq(seq) == T, + @tl.type := T. +``` +Of course, for this to even parse, we must have a definition of `type_Seq`: +```statix +{! ../signature/TYPE.stx extract: {start: module, stop: rules} !} +**/ + +// see docs/implementation.md for detail on how to switch to multi-file analysis rules // single-file entry point programOk : Start - programOk(TopLevel(_)). + /** md +rules + type_Seq : Seq -> TYPE +``` + **/ + + type_LineSeq : LineSeq -> TYPE + + programOk(tl@TopLevel(seq)) :- {T} + type_LineSeq(seq) == T, + @tl.type := T. + + /** md +Now to type a Seq, we look to the syntax, and see that there are two +possibilities for what it might be: just an Ex, or a Sequence(_) of a +list of 'Ex's. For the first, Statix does not allow one sort to simply +"become" another, but the Spoofax infrastructure automatically inserts +"injection" constructors for us, in this case one named Ex2Seq. So the +first rule for `type_Seq` is straightforward: + +```statix + type_Seq(s@Ex2Seq(e)) = T : - + type_Ex(e) == T, + @s.type := T. +``` +where of course type_Ex needs its own declaration analogous to the above. + **/ + + type_Line : Line -> TYPE + + type_LineSeq(ls@Line2LineSeq(l)) = T :- + type_Line(l) == T, + @ls.type := T. + + /** md + +The other (and in fact more typical) rule for `type_Seq`, when it actually +consists of a sequence of expressions, is a bit more involved. Fortunately +Statix provides a primitive for mapping over a list, so we can proceed as +follows: +```statix + types_Exs maps type_Ex(list(*)) = list(*) + type_Seq(s@Sequence(l)) = T :- {lt} + types_Exs(l) == lt, + lastTYPE(lt) == T, + @s.type := T. +``` +Here `lastTYPE` is a function that extracts the last TYPE from a list. +Unless/until Statix develops some sort of standard library, it must be +hand-defined, as done in "statics/util.stx" like so: +```statix +{! ../statics/util.stx extract: {start: lastTYPE} !} +``` + **/ + + types_Lines maps type_Line(list(*)) = list(*) + + type_LineSeq(ls@Sequence(l)) = T :- {lt} + types_Lines(l) == lt, + lastTYPE(lt) == T, + @ls.type := T. + + type_OptTermEx : OptTermEx -> TYPE + + type_Line(l@OptTermEx2Line(ote)) = T :- + type_OptTermEx(ote) == T, + @l.type := T. + + type_Ex : Ex -> TYPE + type_TermEx : TermEx -> TYPE + + type_OptTermEx(ote@Ex2OptTermEx(e)) = T :- + type_Ex(e) == T, + @ote.type := T. + + type_OptTermEx(ote@TermEx2OptTermEx(te)) = T :- + type_TermEx(te) == T, + @ote.type := T. + + /** md + +This brings us to the syntax rules for the basic expressions themselves, +which comprise almost all of the remaining fostr language constructs. +But first a mechanism suggested by Ivo Wilms to avoid repeating the node +type annotation in every rule: +```statix + **/ + + /** md */ + ty_Ex : Ex -> TYPE + + type_Ex(e) = ty@ty_Ex(e) :- + @e.type := ty. + /* **/ + + /** md +``` +At this stage in fostr's development, there was no difference between a +terminated and unterminated expression, so the typing rule for that +constructor was trivial: +```statix + ty_Ex(Terminated(e)) = ty_Ex(e). +``` + **/ + + type_TermEx(te@Terminate(e)) = T :- + type_Ex(e) == T, + @te.type := T. + + /** md + +Now typing literals is straightforward: +```statix + **/ + + /** md */ + ty_Ex(Int(_)) = INT(). + ty_Ex(LitString(_)) = STRING(). + ty_Ex(e@Stream()) = STREAM(). + /* **/ + + /** md +``` + +Finally we get to the binary operators, and here we use the pattern found in +recent versions of the +"[chicago](https://github.com/MetaBorgCube/statix-sandbox/tree/master/chicago)" +example language and in the Fall 2020 TU-Delft class lecture on +[Name Binding and Name Resolution](https://tudelft-cs4200-2020.github.io/lectures/2020/09/24/lecture5/). +This pattern lets us specify error messages. + +```statix + **/ + + /** md */ + ty_Ex(Sum(e1, e2)) = INT() :- + type_Ex(e1) == INT() | error $[Expression [e1] not an Int in sum.]@e1, + type_Ex(e2) == INT() | error $[Expression [e2] not an Int in sum.]@e2. + + ty_Ex(Gets(e1, e2)) = STREAM() :- {T} + type_Ex(e1) == STREAM() | error $[Only Streams may receive items.]@e1, + type_Ex(e2) == T. + + ty_Ex(To(e1, e2)) = T :- + type_Ex(e1) == T, + type_Ex(e2) == STREAM() | error $[Items may only be sent to Streams.]@e2. + /* **/ + + /** md +``` + +### Using type annotations in transformation + +At this point, Statix properly types all of the valid programs of the very +rudimentary language defined by the grammar above. But the proximate purpose +for implementing this typing was to aid Haskell code generation. So how +do we actually use the assigned types in a Stratego transformation? + +Statix provides a Stratego api that includes, among other items, strategies +`stx-get-ast-analysis` and `stx-get-ast-type(|analysis)` that provide access +to the assigned types. However, it's easiest to use the information via +a wrapper like this, essentially lifted from the "chicago" language project: +```stratego +{! analysis.str extract: + start: Extract.the.type +terminate: Prints.the.analyzed.type +!} +``` + +Now `get_type` run on a node of the analyzed AST produces the assigned `TYPE` +(as an ATerm in the constructors of sort TYPE in Statix). + +Thus, you can select on the assigned type, as in the strategy to select +the correct Haskell operator to use to send an item to standard output: +```stratego +{! haskell.str extract: + start: '(.*hs_getOp.=.*)' + stop: \s +!} +``` + **/ rules // multi-file entry point