2021-02-18 04:17:05 +00:00
16 changed files with 386 additions and 34 deletions
--- a/README.md
+++ b/README.md
@ -11,7 +11,8 @@ language as possible to work in, given that I inevitably will be doing a
 bunch of coding. The language will be centrally organized around the
 concept of "streams" (somewhat in the spirit of
 [streem](https://github.com/matz/streem) and/or
-[Orc](http://orc.csres.utexas.edu/index.shtml)). In fact all higher-type
+[Orc](http://orc.csres.utexas.edu/index.shtml), or to a lesser extent,
 [Sisal-is](https://github.com/parsifal-47/sisal-is)). In fact all higher-type
 entities will be cast in terms of streams, or in slogan form, "++f++unctions
 and (binary) ++o++perators are ++str++eams" (hence the name "fostr").
--- a/bin/extract_tests.xsh
+++ b/bin/extract_tests.xsh
@ -27,9 +27,9 @@ for path in TEST_LIST:
    if pfm: continue   # skip examples that don't parse
    ntfm = re.search(r'\n\s*\]\].*?don.t.test', details)
    if ntfm: continue  # explicit skip
-    em = re.search(r'\n\s*\]\]', details)
+    em = re.search(r'\n\]\]', details)
    if not em: continue
-    example = details[:em.start()+1]
+    example = details[:em.start()+1].replace('[[','').replace(']]','')
    expath = destdir / f"{name}.{EXT}"
    expath.write_text(example)
    echo Wrote @(expath)
--- a/editor/Analysis.esv
+++ b/editor/Analysis.esv
@ -22,3 +22,4 @@ menus
    action: "Show pre-analyzed AST" = debug-show-pre-analyzed (source)
    action: "Show analyzed AST"     = debug-show-analyzed
    action: "Show analyzed type"    = debug-show-type
--- a/mkdocs.yml
+++ b/mkdocs.yml
@ -2,6 +2,7 @@ site_name: fostr language
 nav:
 - README.md
 - tests/basic.md
 - trans/statics.md
 - implementation.md
 plugins:
--- a/signature/TYPE.str
+++ b/signature/TYPE.str
@ -0,0 +1 @@
 TYPE.stx
--- a/signature/TYPE.stx
+++ b/signature/TYPE.stx
@ -0,0 +1,7 @@
 module signature/TYPE
 signature
  sorts TYPE // semantic type
  constructors
    INT    : TYPE
    STRING : TYPE
    STREAM : TYPE
--- a/statics/util.stx
+++ b/statics/util.stx
@ -0,0 +1,7 @@
 module statics/util
 imports signature/TYPE
 rules
  lastTYPE : list(TYPE) -> TYPE
  lastTYPE([T]) = T.
  lastTYPE([U | TS]) = lastTYPE(TS).
--- a/syntax/fostr.sdf3
+++ b/syntax/fostr.sdf3
@ -8,6 +8,14 @@ context-free start-symbols
  Start
 lexical sorts
  STRING_LITERAL
 lexical syntax
  STRING_LITERAL = "'"~[\']*"'"
 context-free sorts
  Start LineSeq Line OptTermEx TermExLst TermEx Ex
@ -30,8 +38,9 @@ context-free syntax
  TermEx.Terminate = <<Ex>;>
  Ex.Int       = INT
  Ex.LitString = STRING_LITERAL
  Ex.Stream    = <stream>
-  Ex.Sum      = [[Ex] +  [Ex]] {left}
+  Ex.Sum       = <<Ex> +  <Ex>> {left}
  Ex.Gets      = [[Ex] << [Ex]] {left}
  Ex.To        = [[Ex] >> [Ex]] {left}
--- a/tests/basic.spt
+++ b/tests/basic.spt
@ -1,6 +1,16 @@
 module basic
 language fostr
 test hw1_type [[
 [[stream]] << [['Hello,	world! ']] << [[3+2]] << ' times.'
 ]]
 run get-type on #1 to STREAM()
 run get-type on #2 to STRING()
 run get-type on #3 to INT()
 run get-type to STREAM()
 /** writes
 Hello,	world! 5 times.**/
 /** md
 Title: A whirlwind tour of fostr
--- a/tests/emit_thrice.fos
+++ b/tests/emit_thrice.fos
@ -1,4 +1,4 @@
-  stream << 72 + 87
+  stream << 'Some numbers: '
  stream << 88
    + 96
  99 + 12 >>
--- a/tests/hw.fos
+++ b/tests/hw.fos
@ -0,0 +1 @@
 stream << 'Hello,	world!'
--- a/trans/analysis.str
+++ b/trans/analysis.str
@ -1,5 +1,4 @@
 module analysis
 imports
  statixruntime
@ -51,3 +50,18 @@ rules // Debugging
  debug-show-analyzed: (sel, _, _, path, projp) -> (filename, result)
    with filename := <guarantee-extension(|"analyzed.aterm")> path
       ; result   := sel
  // Extract the type assigned to a node by Statix
  get-type: node -> type
    where
      // Assigns variable a to be the result of the Statix analysis of the entire program (or throws an error)
      a := <stx-get-ast-analysis <+ fail-msg(|$[no analysis on node [<strip-annos;write-to-string> node]])>;
      // Gets the type of the given node (or throws an error)
      type := <stx-get-ast-type(|a) <+ fail-msg(|$[no type on node [<strip-annos;write-to-string> node]])> node
  fail-msg(|msg) = err-msg(|$[get-type: [msg]]); fail
  // Prints the analyzed type of a selection.
  debug-show-type: (sel, _, _, path, projp) -> (filename, result)
    with filename := <guarantee-extension(|"type.aterm")> path
       ; result   := <get-type> sel
--- a/trans/haskell.str
+++ b/trans/haskell.str
@ -1,15 +1,25 @@
 module haskell
-imports libstrategolib signatures/- util
+imports libstrategolib signatures/- signature/TYPE util analysis
 rules
-  /* Approach: Generate code from the bottom up.
+  /* Approach:
-     At every node, we create a pair of the implementation and
+     A) We will define a local transformation taking a term with value strings
-     necessary preamble of IO actions.
+        at each child to a value string for the node.
-     We concatenate preambles as we go up.
+     B) We will append IO actions needed to set up for the value progressively
-     Finally, at the toplevel we emit the preamble before returning the
+        to a Preactions rule (mapping () to the list of actions). There will
-     final value.
+        be a utility `add-preaction` to append a new clause to value of this
        rule.
     C) We will use bottomup-para to traverse the full AST with the
        transformation from A so that we have access to the original expression
        (and get get the Statix-associated type when we need to).
        Hence the transformation in (A) must actually take a pair of
        an (original) term and a term with value strings at each child,
        and be certain to return a value string.
     Finally, at the toplevel we emit the result of <Preactions>() before
     returning the final value.
   */
-  hs: TopLevel((c,p)) -> $[import System.IO
+  hs: (_, TopLevel(val)) -> $[import System.IO
  data IOStream = StdIO
  gets :: Show b => a -> b -> IO a
@ -17,27 +27,61 @@ rules
    putStr(show d)
    return s
  getsStr :: a -> String -> IO a
  getsStr s d = do
    putStr(d)
    return s
  main = do
-    [p]return [c]]
+    [<Preactions>()]return [val]]
-  hs: Stream()             -> ("StdIO", "")
+  hs: (_, Stream())     -> "StdIO"
-  hs: Int(x)               -> (x, "")
+  hs: (_, Int(x))       -> x
-  hs: Sum( (c, p), (d, q)) -> ($[([c] + [d])], <conc-strings>(p,q))
+  hs: (_, LitString(x)) -> <haskLitString>x
  hs: (_, Sum(x, y))    -> $[([x] + [y])]
-  hs: Gets((c, p), (d, q)) -> <hsget>(c,d,<conc-strings>(p,q),<newname>"fosgt")
+  hs: (Gets(_, xn), Gets(s, x)) -> v
-  hsget: (s, x, p, v) -> (v, <concat-strings>[p, $[[v] <- [s] `gets` [x]],
+    with v := <newname>"_fostr_get"
-                                              "\n"])
+       ; <add-preactions>[$[[v] <- [<hs_gets>(s, xn, x)]]]
  hs: (To(xn, _), To(x, s))     -> v
    with v := <newname>"_fostr_to"
       ; <add-preactions>[$[let [v] = [x]], <hs_gets>(s, xn, v)]
-  hs: To(  (c, p), (d, q)) -> <hsto>(c,d,<conc-strings>(p,q),<newname>"fosto")
+  hs_gets: (s, xn, x ) -> $[[s] [<hs_getOp>xn] [x]]
-  hsto: (x, s, p, v) -> (v, <concat-strings>[p, $[let [v] = [x]], "\n",
+  hs_getOp = get-type; (?STRING() < !"`getsStr`" + !"`gets`")
                                             $[[s] `gets` [v]], "\n"])
-  hs: Terminate((c,p))     -> ($[[c];;], p)
+  hs: (_, Terminate(x)) -> $[[x];;]
-  hs: Sequence(l)          -> (<last; Fst>l, <map(Snd); concat-strings>l)
+  hs: (_, Sequence(l))  -> <last>l
  /* One drawback of using paramorphism is we have to handle lists
     explicitly:
   */
  hs: (_, []) -> []
  hs: (_, [x | xs]) -> [x | xs]
  /* Another drawback of using paramorphism is at the very leaves we have
     to undouble the tuple:
   */
  hs: (x, x) -> x where <is-string>x
  /* Characters we need to escape in Haskell string constants */
  Hascape: ['\t' | cs ] -> ['\', 't'  | cs ]
  /* I think I can just use ASCII constants for characters... */
  Hascape: [ 0   | cs ] -> ['\', '0'  | cs ]
  Hascape: [ 7   | cs ] -> ['\', 'a'  | cs ] // Alert
  Hascape: [ 8   | cs ] -> ['\', 'b'  | cs ] // Backspace
  Hascape: [ 11  | cs ] -> ['\', 'v'  | cs ] // Vertical tab
  Hascape: [ 12  | cs ] -> ['\', 'f'  | cs ] // Form feed
 strategies
  haskLitString = un-single-quote
                ; string-as-chars(escape-chars(Escape <+ Hascape))
                ; double-quote
-  haskell = bottomup(try(hs))
+  haskell = rules(Preactions: () -> ""); bottomup-para(try(hs))
  /* See "Approach" at top of file */
  add-preactions = newp := <conc-strings>(<Preactions>(), <lines>)
                 ; rules(Preactions: () -> newp)
  // Interface haskell code generation with editor services and file system
  to-haskell: (selected, _, _, path, project-path) -> (filename, result)
--- a/trans/javascript.str
+++ b/trans/javascript.str
@ -13,13 +13,25 @@ rules
  js: Stream()     -> $[Stdio]
  js: Int(x)       -> x
  js: LitString(x) -> <javaLitString>x
  js: Sum(x,y)     -> $[[x] + [y]]
  js: Gets(x, y)   -> $[[x].gets([y])]
  js: To(x, y)     -> $[to([x],[y])]
  js: Terminate(x) -> x
  js: Sequence(l)  -> <join(|";\n")>l
  /* Characters we need to escape in Javascript string constants */
  Jscape: ['\t' | cs ] -> ['\', 't'  | cs ]
  /* I think I can just use ASCII constants for characters... */
  Jscape: [ 0   | cs ] -> ['\', '0'  | cs ]
  Jscape: [ 8   | cs ] -> ['\', 'b'  | cs ] // Backspace
  Jscape: [ 11  | cs ] -> ['\', 'v'  | cs ] // Vertical tab
  Jscape: [ 12  | cs ] -> ['\', 'f'  | cs ] // Form feed
 strategies
  javaLitString = un-single-quote
                ; string-as-chars(escape-chars(Escape <+ Jscape))
                ; single-quote
  javascript = bottomup(try(js))
--- a/trans/python.str
+++ b/trans/python.str
@ -15,6 +15,7 @@ rules
  py: Stream()     -> $[Stdio]
  py: Int(x)       -> x
  py: LitString(x) -> $[r[x]]
  py: Sum(x,y)     -> $[[x] + [y]]
  py: Gets(x, y)   -> $[[x].gets([y])]
  py: To(x, y)     -> $[to([x],[y])]
--- a/trans/statics.stx
+++ b/trans/statics.stx
@ -1,14 +1,257 @@
 module statics
 imports signatures/fostr-sig
 imports signature/TYPE
 imports statics/util
-// see docs/implementation.md for details on how to switch to multi-file analysis
+/** md
 Title: Adding Program Analysis with Statix
 ## Development of fostr static analysis
 This section is more documentation of Spoofax in general and Statix
 in particular than of fostr itself, but is being maintained here in case
 it could be either helpful to someone getting started with Statix or
 helpful in understanding how the static characteristics of fostr were designed.
 As mentioned in the [Overview](../README.md), I don't like to program and a
 corollary of that is never to use a facility unless/until there's a need for
 it. So the first few rudimentary passes at fostr simply declared every program
 to be "OK" from the point of view of Statix:
 ```statix
 {! "\git docs/statix_start:trans/statics.stx" extract:
  start: programOk
  stop: (.*TopLevel.*)
 !}
 ```
 Then I reached the point at which the grammar was basically just
 ```SDF3
 // Start.TopLevel = <Seq>
 // Seq            = <Ex>
 // Seq.Sequence   = sq:Ex+ {layout(align-list sq)}
 // Ex.Terminated  = <<Ex>;>
 {! "\git docs/statix_start:syntax/fostr.sdf3" extract:
  start: TermEx.Terminate
  stop: (.*bracket.*)
 !}
 ```
 (The first four clauses are in comments because they approximate fostr's
 grammar; it actually uses a few more sorts for sequences of
 expressions, to achieve fostr's exact layout rules. Also note that the parsing
 of literal strings later evolved to include the surrounding single quotes,
 because the rule above implicitly allows layout between the quotes and the
 string contents, creating ambiguity.)
 This was the first point at which there were two different types that might
 need to be written to standard output (Int and String), and although of course
 the dynamically-typed Python and Javascript code generated dealt with both fine,
 the Haskell code needed to differ depending on the
 type of the item written (and I hadn't even started OCaml code generation at
 that point since I knew it would be hopeless without statically typing fostr
 programs).
 So it was time to bite the bullet and add type checking via Statix to fostr.
 The first step was to replace the simple assertion that any TopLevel
 is OK with a constraint that its Seq must type properly, and an assignment of
 that type to the top level node:
 ```statix
 programOk(tl@TopLevel(seq)) :- {T}
  type_Seq(seq) == T,
  @tl.type := T.
 ```
 Of course, for this to even parse, we must have a definition of `type_Seq`:
 ```statix
 {! ../signature/TYPE.stx extract: {start: module, stop: rules} !}
 **/
 // see docs/implementation.md for detail on how to switch to multi-file analysis
 rules // single-file entry point
  programOk : Start
-  programOk(TopLevel(_)).
+  /** md
 rules
  type_Seq : Seq -> TYPE
 ```
  **/
  type_LineSeq : LineSeq -> TYPE
  programOk(tl@TopLevel(seq)) :- {T}
    type_LineSeq(seq) == T,
    @tl.type := T.
  /** md
 Now to type a Seq, we look to the syntax, and see that there are two
 possibilities for what it might be: just an Ex, or a Sequence(_) of a
 list of 'Ex's. For the first, Statix does not allow one sort to simply
 "become" another, but the Spoofax infrastructure automatically inserts
 "injection" constructors for us, in this case one named Ex2Seq. So the
 first rule for `type_Seq` is straightforward:
 ```statix
  type_Seq(s@Ex2Seq(e)) = T : -
    type_Ex(e) == T,
    @s.type := T.
 ```
 where of course type_Ex needs its own declaration analogous to the above.
  **/
  type_Line : Line -> TYPE
  type_LineSeq(ls@Line2LineSeq(l)) = T :-
    type_Line(l) == T,
    @ls.type := T.
  /** md
 The other (and in fact more typical) rule for `type_Seq`, when it actually
 consists of a sequence of expressions, is a bit more involved. Fortunately
 Statix provides a primitive for mapping over a list, so we can proceed as
 follows:
 ```statix
  types_Exs maps type_Ex(list(*)) = list(*)
  type_Seq(s@Sequence(l)) = T :- {lt}
    types_Exs(l) == lt,
    lastTYPE(lt) == T,
    @s.type := T.
 ```
 Here `lastTYPE` is a function that extracts the last TYPE from a list.
 Unless/until Statix develops some sort of standard library, it must be
 hand-defined, as done in "statics/util.stx" like so:
 ```statix
 {! ../statics/util.stx extract: {start: lastTYPE} !}
 ```
  **/
  types_Lines maps type_Line(list(*)) = list(*)
  type_LineSeq(ls@Sequence(l)) = T :- {lt}
    types_Lines(l) == lt,
    lastTYPE(lt) == T,
    @ls.type := T.
  type_OptTermEx : OptTermEx -> TYPE
  type_Line(l@OptTermEx2Line(ote)) = T :-
    type_OptTermEx(ote) == T,
    @l.type := T.
  type_Ex     :     Ex -> TYPE
  type_TermEx : TermEx -> TYPE
  type_OptTermEx(ote@Ex2OptTermEx(e)) = T :-
    type_Ex(e) == T,
    @ote.type := T.
  type_OptTermEx(ote@TermEx2OptTermEx(te)) = T :-
    type_TermEx(te) == T,
    @ote.type := T.
  /** md
 This brings us to the syntax rules for the basic expressions themselves,
 which comprise almost all of the remaining fostr language constructs.
 But first a mechanism suggested by Ivo Wilms to avoid repeating the node
 type annotation in every rule:
 ```statix
  **/
  /** md */
  ty_Ex : Ex -> TYPE
  type_Ex(e) = ty@ty_Ex(e) :-
    @e.type := ty.
  /* **/
  /** md
 ```
 At this stage in fostr's development, there was no difference between a
 terminated and unterminated expression, so the typing rule for that
 constructor was trivial:
 ```statix
  ty_Ex(Terminated(e)) = ty_Ex(e).
 ```
  **/
  type_TermEx(te@Terminate(e)) = T :-
    type_Ex(e) == T,
    @te.type := T.
  /** md
 Now typing literals is straightforward:
 ```statix
  **/
  /** md */
  ty_Ex(Int(_))       = INT().
  ty_Ex(LitString(_)) = STRING().
  ty_Ex(e@Stream())   = STREAM().
  /* **/
  /** md
 ```
 Finally we get to the binary operators, and here we use the pattern found in
 recent versions of the
 "[chicago](https://github.com/MetaBorgCube/statix-sandbox/tree/master/chicago)"
 example language and in the Fall 2020 TU-Delft class lecture on
 [Name Binding and Name Resolution](https://tudelft-cs4200-2020.github.io/lectures/2020/09/24/lecture5/).
 This pattern lets us specify error messages.
 ```statix
  **/
  /** md */
  ty_Ex(Sum(e1, e2)) = INT() :-
    type_Ex(e1) == INT() | error $[Expression [e1] not an Int in sum.]@e1,
    type_Ex(e2) == INT() | error $[Expression [e2] not an Int in sum.]@e2.
  ty_Ex(Gets(e1, e2)) = STREAM() :- {T}
    type_Ex(e1) == STREAM() | error $[Only Streams may receive items.]@e1,
    type_Ex(e2) == T.
  ty_Ex(To(e1, e2)) = T :-
    type_Ex(e1) == T,
    type_Ex(e2) == STREAM() | error $[Items may only be sent to Streams.]@e2.
  /* **/
  /** md
 ```
 ### Using type annotations in transformation
 At this point, Statix properly types all of the valid programs of the very
 rudimentary language defined by the grammar above. But the proximate purpose
 for implementing this typing was to aid Haskell code generation. So how
 do we actually use the assigned types in a Stratego transformation?
 Statix provides a Stratego api that includes, among other items, strategies
 `stx-get-ast-analysis` and `stx-get-ast-type(|analysis)` that provide access
 to the assigned types. However, it's easiest to use the information via
 a wrapper like this, essentially lifted from the "chicago" language project:
 ```stratego
 {! analysis.str extract:
  start: Extract.the.type
 terminate: Prints.the.analyzed.type
 !}
 ```
 Now `get_type` run on a node of the analyzed AST produces the assigned `TYPE`
 (as an ATerm in the constructors of sort TYPE in Statix).
 Thus, you can select on the assigned type, as in the strategy to select
 the correct Haskell operator to use to send an item to standard output:
 ```stratego
 {! haskell.str extract:
  start: '(.*hs_getOp.=.*)'
  stop: \s
 !}
 ```
  **/
 rules // multi-file entry point