diff --git a/.drone.yml b/.drone.yml index 452f163..90b928b 100644 --- a/.drone.yml +++ b/.drone.yml @@ -29,7 +29,18 @@ steps: - java -jar /drone/lib/spt/org.metaborg.spt.cmd/target/org.metaborg.spt.cmd* -l . -s /drone/lib/spt/org.metaborg.meta.lang.spt -t tests - mkdir -p lib - curl -o lib/sunshine.jar -L 'http://artifacts.metaborg.org/service/local/artifact/maven/redirect?r=snapshots&g=org.metaborg&a=org.metaborg.sunshine2&v=LATEST' - - bin/fosgen tests/emit_sum.fos + - name: setup_gen + image: gcc + volumes: + - name: m2 + path: /root/.m2 + commands: + - git clone https://github.com/facebook/nailgun.git + - cd nailgun + - make + - cd ../bin + - ln -s ../nailgun/nailgun-client/target/ng . + - cd .. - name: extract_tests image: xonsh/xonsh commands: @@ -41,7 +52,8 @@ steps: path: /drone/lib - name: m2 path: /root/.m2 - commands: + commands: # Note we first make sure that fosgen is working + - bin/fosgen -d tests/emit_sum.fos - bin/generate_test_code - name: python_tests image: python:slim @@ -55,6 +67,13 @@ steps: image: haskell commands: - bin/run_tests runghc hs + - name: ocaml_tests + image: ocaml/opam + commands: + - ls -als tests/extracted + - opam init + - eval $(opam env) + - bin/run_tests ocaml ml volumes: - name: lib diff --git a/.gitignore b/.gitignore index 80a4a3c..35e6d95 100644 --- a/.gitignore +++ b/.gitignore @@ -12,10 +12,16 @@ .pydevproject +a.out + *.aterm /site +bin/ng tests/extracted/* tests/*.js tests/*.py tests/*.hs +tests/*.ml +tests/*.cmi +tests/*.cmo adhoc* diff --git a/README.md b/README.md index 91e1605..0df4cba 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,8 @@ language as possible to work in, given that I inevitably will be doing a bunch of coding. The language will be centrally organized around the concept of "streams" (somewhat in the spirit of [streem](https://github.com/matz/streem) and/or -[Orc](http://orc.csres.utexas.edu/index.shtml)). In fact all higher-type +[Orc](http://orc.csres.utexas.edu/index.shtml), or to a lesser extent, +[Sisal-is](https://github.com/parsifal-47/sisal-is)). In fact all higher-type entities will be cast in terms of streams, or in slogan form, "++f++unctions and (binary) ++o++perators are ++str++eams" (hence the name "fostr"). diff --git a/bin/extract_tests.xsh b/bin/extract_tests.xsh index 79b2ca8..422acb1 100644 --- a/bin/extract_tests.xsh +++ b/bin/extract_tests.xsh @@ -13,12 +13,16 @@ DESTINATION = 'tests/extracted' # Extension for extracted files: EXT = 'fos' +# Extension for desired input: +INP = 'in' + # Extension for expectations: EXP = 'expect' for path in TEST_LIST: destdir = pf"{DESTINATION}/{path.stem}" mkdir -p @(destdir) + chmod ugo+rwx @(destdir) contents = path.read_text() tests = re.split(r'test\s*(.+?)\s*\[\[.*?\n', contents)[1:] testit = iter(tests) @@ -27,12 +31,17 @@ for path in TEST_LIST: if pfm: continue # skip examples that don't parse ntfm = re.search(r'\n\s*\]\].*?don.t.test', details) if ntfm: continue # explicit skip - em = re.search(r'\n\s*\]\]', details) + em = re.search(r'\n\]\]', details) if not em: continue - example = details[:em.start()+1] + example = details[:em.start()+1].replace('[[','').replace(']]','') expath = destdir / f"{name}.{EXT}" expath.write_text(example) echo Wrote @(expath) + im = re.search(r'/\*\*\s+accepts.*?\n([\s\S]*?)\*\*/', details[em.end():]) + if im: + ipath = destdir / f"{name}.{INP}" + ipath.write_text(im[1]) + echo " ...and" @(ipath) xm = re.search(r'/\*\*\s+writes.*?\n([\s\S]*?)\*\*/', details[em.end():]) if xm: xpath = destdir / f"{name}.{EXP}" diff --git a/bin/fosgen b/bin/fosgen index c5af48b..b03656b 100755 --- a/bin/fosgen +++ b/bin/fosgen @@ -5,6 +5,7 @@ erro() { printf "%s\n" "$*" >&2; } ##### Set defaults: SUPPRESS_ERR=YES +USE_NAILGUN=YES LANGUAGE=Python ##### Extract command line options: @@ -14,18 +15,23 @@ do -h|--help) echo echo "Usage:" - echo " fosgen [-d] [-l LANGUAGE] FILE" + echo " fosgen [-d] [-j] [-l LANGUAGE] FILE" echo echo "Writes to standard output the code generated from the fostr" echo "program in FILE, targeting the specified LANGUAGE (which" echo "defaults to Python)." echo echo "The -d option writes diagnostic output to standard error." + echo "The -j option uses the Spoofax Sunshine JAR directly, rather" + echo "than via nailgun." exit ;; -d) SUPPRESS_ERR='' ;; + -j) + USE_NAILGUN='' + ;; -l) shift LANGUAGE="$1" @@ -67,5 +73,17 @@ then exec 2>/dev/null fi +if [[ $USE_NAILGUN ]] +then + if [[ $SUPPRESS_ERR ]] + then + $BINDIR/let_sun_shine + else + $BINDIR/let_sun_shine noisy + fi + $BINDIR/ng sunshine transform -p $PROJDIR -n $LANGUAGE -i $PROGRAM + exit $? +fi + java -jar $SUNJAR transform -p $PROJDIR -l $PROJDIR -l $MVN_REPO -n $LANGUAGE -i $PROGRAM exit $? diff --git a/bin/generate_test_code b/bin/generate_test_code index 2c443d6..0d151a8 100755 --- a/bin/generate_test_code +++ b/bin/generate_test_code @@ -4,7 +4,7 @@ failed=0 for dir in tests/extracted/*; do for file in $dir/*.fos; do - for language in Python Javascript Haskell; do + for language in Python Javascript Haskell OCaml; do echo bin/fosgen -l ${language%.*} $file ... bin/fosgen -l $language $file if [[ $? -ne 0 ]]; then diff --git a/bin/let_sun_shine b/bin/let_sun_shine new file mode 100755 index 0000000..cb3c67a --- /dev/null +++ b/bin/let_sun_shine @@ -0,0 +1,40 @@ +#!/bin/bash + +# Helper for fosgen, not intended to be used directly +# With an argument, print diagnostic output + +BINDIR=$(dirname $BASH_SOURCE) + +if $BINDIR/ng sunshine --help +then + if [[ $1 ]] + then + echo "sun already shining." + fi +else + if [[ $1 ]] + then + echo "disperse the clouds." + fi + SUNJAR="$BINDIR/../lib/sunshine.jar" + PROJDIR="$BINDIR/.." + if [[ ! $MVN_REPO ]]; then + MVN_REPO="$HOME/.m2/repository" + fi + if [[ ! -d $MVN_REPO ]]; then + MVN_REPO="/root/.m2/repository" + fi + if [[ ! -d $MVN_REPO ]]; then + echo "Cannot find your Maven repository. Please set environment variable" + echo "MVN_REPO to its full path and re-run." + exit 1 + fi + if [[ $1 ]] + then + java -jar $SUNJAR server & + else + java -jar $SUNJAR server >/dev/null 2>&1 & + fi + sleep 5 + $BINDIR/ng sunshine load -l $PROJDIR -l $MVN_REPO +fi diff --git a/bin/run_tests b/bin/run_tests index 90ccaa7..da49485 100755 --- a/bin/run_tests +++ b/bin/run_tests @@ -9,8 +9,14 @@ diffed=0 for dir in tests/extracted/*; do for file in $dir/*.$ext; do ((total++)) - $command $file > $file.out - if [[ $? -ne 0 ]]; then + if [[ -f ${file%.*}.in ]]; then + cat ${file%.*}.in | $command $file > $file.out + result=$? + else + $command $file > $file.out + result=$? + fi + if [[ $result -ne 0 ]]; then echo ERROR: $command $file failed. ((failed++)) else diff --git a/editor/Analysis.esv b/editor/Analysis.esv index 0667f1e..eb3197f 100644 --- a/editor/Analysis.esv +++ b/editor/Analysis.esv @@ -22,3 +22,4 @@ menus action: "Show pre-analyzed AST" = debug-show-pre-analyzed (source) action: "Show analyzed AST" = debug-show-analyzed + action: "Show analyzed type" = debug-show-type diff --git a/editor/Generation.esv b/editor/Generation.esv index 3d71fe2..9e5051c 100644 --- a/editor/Generation.esv +++ b/editor/Generation.esv @@ -4,3 +4,4 @@ menus action: "Python" = to-python action: "Javascript" = to-javascript action: "Haskell" = to-haskell + action: "OCaml" = to-ocaml diff --git a/mkdocs.yml b/mkdocs.yml index 6726b34..c2d5242 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -2,13 +2,14 @@ site_name: fostr language nav: - README.md - tests/basic.md +- trans/statics.md - implementation.md plugins: - search - semiliterate: ignore_folders: [target, lib] - exclude_extensions: ['.o', '.hi'] + exclude_extensions: ['.o', '.hi', '.cmi', '.cmo'] extract_standard_markdown: terminate: theme: diff --git a/signature/TYPE.str b/signature/TYPE.str new file mode 120000 index 0000000..332d8ef --- /dev/null +++ b/signature/TYPE.str @@ -0,0 +1 @@ +TYPE.stx \ No newline at end of file diff --git a/signature/TYPE.stx b/signature/TYPE.stx new file mode 100644 index 0000000..e299f8b --- /dev/null +++ b/signature/TYPE.stx @@ -0,0 +1,7 @@ +module signature/TYPE +signature + sorts TYPE // semantic type + constructors + INT : TYPE + STRING : TYPE + STREAM : TYPE diff --git a/statics/util.stx b/statics/util.stx new file mode 100644 index 0000000..7ce94e7 --- /dev/null +++ b/statics/util.stx @@ -0,0 +1,7 @@ +module statics/util +imports signature/TYPE + +rules + lastTYPE : list(TYPE) -> TYPE + lastTYPE([T]) = T. + lastTYPE([U | TS]) = lastTYPE(TS). diff --git a/syntax/fostr.sdf3 b/syntax/fostr.sdf3 index 4a39131..f782466 100644 --- a/syntax/fostr.sdf3 +++ b/syntax/fostr.sdf3 @@ -8,6 +8,14 @@ context-free start-symbols Start +lexical sorts + + STRING_LITERAL + +lexical syntax + + STRING_LITERAL = "'"~[\']*"'" + context-free sorts Start LineSeq Line OptTermEx TermExLst TermEx Ex @@ -29,18 +37,27 @@ context-free syntax TermEx.Terminate = <;> - Ex.Int = INT - Ex.Stream = - Ex.Sum = [[Ex] + [Ex]] {left} - Ex.Gets = [[Ex] << [Ex]] {left} - Ex.To = [[Ex] >> [Ex]] {left} + Ex.Int = INT + Ex.LitString = STRING_LITERAL + Ex.EscString = STRING + Ex.Stream = + Ex.Sum = < + > {left} + Ex.Concat = < ++ > {left} + Ex.Gets = [[Ex] << [Ex]] {left} + Ex.DefGets = [<<< [Ex]] + Ex.To = [[Ex] >> [Ex]] {left} + Ex.DefTo = [[Ex] >>>] + Ex.Emits = <!> + Ex.DefEmits = - Ex = <()> {bracket} + Ex = <()> {bracket} context-free priorities Ex.To - > Ex.Sum + > Ex.DefTo + > {Ex.Sum Ex.Concat} + > Ex.DefGets > Ex.Gets, // prevent cycle: no singletons diff --git a/tests/basic.spt b/tests/basic.spt index e3f4f69..3d5a4a6 100644 --- a/tests/basic.spt +++ b/tests/basic.spt @@ -1,20 +1,36 @@ module basic language fostr +test hw1_type [[ +[[stream]] << [['Hello, world! ']] << [[3+2]] << ' times.' +]] +run get-type on #1 to STREAM() +run get-type on #2 to STRING() +run get-type on #3 to INT() +run get-type to STREAM() +/** writes +Hello, world! 5 times.**/ + /** md Title: A whirlwind tour of fostr ## Whirlwind tour -fostr is just in its infancy, so it's not yet even ready for -Hello, World. The best we can offer now is this little snippet -that writes the sum of the ASCII codes for 'H', 'W', and '!' to standard output: +There seems only to be one way to start a tour like this. So here goes: ```fostr **/ -/** md */ test emit_sum [[ -stream << 72 + 87 + 33 +/** md */ test hello_world [[ +<<< 'Hello, world!' ]] /* **/ +parse to TopLevel(DefGets(LitString("'Hello, world!'"))) +/** writes +Hello, world!**/ + +// Prior proto-hello-world, no longer in the tour. +test emit_sum [[ +stream << 72 + 87 + 33 +]] parse to TopLevel(Gets(Stream(), Sum(Sum(Int("72"), Int("87")), Int("33")))) /** writes 192**/ @@ -23,7 +39,7 @@ parse to TopLevel(Gets(Stream(), Sum(Sum(Int("72"), Int("87")), Int("33")))) ``` At the moment, there are only two ways to run a file containing fostr code -(you can find the above in `tests/emit_sum.fos`). They both start by +(you can find the above in `tests/hw.fos`). They both start by cloning this fostr project. Then, either: 1. Open the project in Eclipse and build it, visit your program file, @@ -36,30 +52,70 @@ cloning this fostr project. Then, either: For example, this snippet generates the following Python: ```python -{! ../tests/emit_sum.py extract: +{! ../tests/hw.py extract: start: 'Stdio\s=' !} ``` -(which writes "192" to standard output); it also generates identical code in -this simple example for -Javascript, although it generates a different preamble defining Stdio in each -case. (Haskell code generation is also currently supported.) +It generates nearly identical code in +this simple example for Javascript (just with `"Hello, world!"` +in place of `r'Hello, world!'`), although it generates a different +preamble defining Stdio for each language. (Currently, Haskell and OCaml +code generation are also supported.) + +There's not much to break down in such a tiny program as this, but let's do +it. The prefix operator `<<<` could be read as "the default stream receives...", +and unsurprisingly in a main program the default stream is standard input and +output. And `'Hello, world!'` is a literal string constant; what you see is +what you get. The only detail to know is that such constants must occur +within a single line of your source file. So depending on how you +ran the program and how closely you looked at its output, +you may have noticed this program does not write a newline at the end +of its message. Nothing is ever implicitly sent to a stream. So if you want +newlines, you should switch to a (double-quoted) string that allows +the usual array of escape sequences: + +```fostr +**/ + +/** md */ test hello_esc_world [[ +<<< "Hello,\t\tworld!\n\n" +]] /* **/ +parse to TopLevel(DefGets(EscString("\"Hello,\t\tworld!\n\n\""))) +/** writes +Hello, world! + +**/ + +/** md +``` +(We threw in two of each so you could clearly see them in the output if +you run this program.) ### Everything has a value As mentioned in the [Introduction](../README.md), everything in a fostr program (including the entire program itself) is an expression and has -a value. So what's the value of that expression above? Well, appropriately -enough, `stream` is our -first example of a stream, and for convenience, the value of a stream -receiving an item is (usually) just the stream back again. The `<<` operator -is also left-associative, so that way we can chain insertions into a stream: +a value. So what's the value of that expression above? Well, for convenience, +the value of a stream receiving an item is (generally) just the stream back +again. That way we can use the general (left-associative) +`_stream_ << _value_` operator to chain insertions into a stream: ```fostr **/ -/** md */ test emit_twice [[ -stream << 72 + 87 + 33 << 291 +/** md */ test state_obvious [[ +<<< 'Two and ' << 2 << ' make ' << 2+2 << ".\n" ]] /* **/ +parse to TopLevel( + Gets(Gets(Gets(Gets(DefGets(LitString("'Two and '")),Int("2")), + LitString("' make '")),Sum(Int("2"),Int("2"))), + EscString("\".\n\""))) +/** writes +Two and 2 make 4. +**/ + +test emit_twice [[ +stream << 72 + 87 + 33 << 291 +]] parse to TopLevel( Gets(Gets(Stream(), Sum(Sum(Int("72"), Int("87")), Int("33"))), Int("291"))) /** writes @@ -67,24 +123,28 @@ parse to TopLevel( /** md ``` -Running this program produces a nice palindromic output: "192291". And because sometimes you want to emphasize the value and propagate that instead of the stream, you can also write these expressions "the other way" -with `>>`; both forms return the first argument, so the following writes "824": +with `>>>` for sending to the default stream or `>>` in general; these forms +(generally) return the value sent, so the following writes "824": ```fostr **/ /** md */ test enters_twice [[ -(7 + 8 >> stream + 9) >> stream +(7 + 8 >> stream + 9) >>> ]] /* **/ parse to TopLevel( - To(Sum(Sum(Int("7"), To(Int("8"), Stream())), Int("9")), Stream())) + DefTo(Sum(Sum(Int("7"), To(Int("8"), Stream())), Int("9")))) /** writes 824**/ /** md ``` +Two things are worth noting here: the default stream can always be referred to +directly via the identifier `stream`, and the precedences of `<<` and `>>` are +different so that generally full expressions go to a stream with `<<` but +just individual terms are sent with `>>`. ### Layout in fostr @@ -94,13 +154,13 @@ lines are indented from the start of the initial line: **/ /** md */ test receive_enter_break [[ -stream << +<<< 7 - + 8 >> stream + + 8 >>> + 9 ]] /* **/ parse to TopLevel( - Gets(Stream(), Sum(Sum(Int("7"), To(Int("8"), Stream())), Int("9")))) + DefGets(Sum(Sum(Int("7"), DefTo(Int("8"))), Int("9")))) /** writes 824**/ @@ -111,8 +171,8 @@ parse to TopLevel( **/ /** md */ test enter_receive_bad_continuation [[ -(7 + 8 >> stream + 9) ->> (stream << 9 + 2) +(7 + 8 >>> + 9) +>> (<<< 9 + 2) ]] /* **/ parse fails @@ -135,16 +195,17 @@ lines are evaluated in sequence. For example, the program **/ /** md */ test emit_thrice [[ - stream << 72 + 87 - stream << 88 + <<< 72 + 87 + <<< 88 + 96 - 99 + 12 >> - stream + 99 + 12 + >>> + ]] /* **/ parse to TopLevel(Sequence([ - Gets(Stream(), Sum(Int("72"), Int("87"))), - Gets(Stream(), Sum(Int("88"), Int("96"))), - Sum(Int("99"), To(Int("12"), Stream())) + DefGets(Sum(Int("72"), Int("87"))), + DefGets(Sum(Int("88"), Int("96"))), + Sum(Int("99"), DefTo(Int("12"))) ])) /** writes 15918412**/ @@ -159,10 +220,10 @@ in sequence align at the left; e.g., the following fails to parse: **/ /** md */ test emit_thrice_bad_alignment [[ - stream << 72 + 87 -stream << 88 + <<< 72 + 87 +<<< 88 + 96 - 99 + 12 >> stream + 99 + 12 >>> ]] /* **/ parse fails @@ -177,23 +238,23 @@ are so terminated. So the following is OK: **/ /** md */ test emit_several [[ - stream << 1 + 2; 3 >> stream - (4 + 5) >> stream; stream << 6; - stream << 7 - stream << 8 + <<< 1 + 2; 3 >>> + (4 + 5) >>>; stream << 6; + <<< 7 + <<< 8 + (9+10); - 11 + 12 >> stream; 13 >> stream - >> stream + 11 + 12 >>>; 13 >>> + >>> ]] /* **/ parse to TopLevel(Sequence([ - ISequence(Prior([Terminate(Gets(Stream(), Sum(Int("1"), Int("2"))))]), - To(Int("3"), Stream())), - ISequence(Prior([Terminate(To(Sum(Int("4"), Int("5")), Stream()))]), + ISequence(Prior([Terminate(DefGets(Sum(Int("1"), Int("2"))))]), + DefTo(Int("3"))), + ISequence(Prior([Terminate(DefTo(Sum(Int("4"), Int("5"))))]), Terminate(Gets(Stream(), Int("6")))), - Gets(Stream(), Int("7")), - Terminate(Gets(Stream(), Sum(Int("8"), Sum(Int("9"), Int("10"))))), - ISequence(Prior([Terminate(Sum(Int("11"), To(Int("12"), Stream())))]), - To(To(Int("13"), Stream()), Stream())) + DefGets(Int("7")), + Terminate(DefGets(Sum(Int("8"), Sum(Int("9"), Int("10"))))), + ISequence(Prior([Terminate(Sum(Int("11"), DefTo(Int("12"))))]), + DefTo(DefTo(Int("13")))) ])) /** writes 3396727121313**/ @@ -221,3 +282,49 @@ run desugar-fostr to TopLevel(Sequence([ Terminate(Sum(Int("11"), To(Int("12"), Stream()))), To(To(Int("13"), Stream()), Stream()) ])) + +test emit_several_default [[ +<<< 1 + 2; 3 >>> +(4 + 5) >>> >> stream; stream << 6; +<<< 7 << 75 +<<< 8 + + (9+10); +11 + 12 >>>; 13 >>> + >>> +]] parse succeeds +/** writes +3399677527121313**/ + +/** md +### Streams are bidirectional + +So far we have only sent items to a stream. But we can extract them from +streams as well, with the `!` postfix operator. `!!` all by itself abbreviates +`stream!`, i.e., extraction from the standard stream. For example, + +```fostr +**/ + +/** md */ test custom_hw [[ +<<< "What is your name?\n" +<<< 'Hello, ' ++ !! +]] /* **/ +parse to TopLevel(Sequence([ + DefGets(EscString("\"What is your name?\n\"")), + DefGets(Concat(LitString("'Hello, '"),DefEmits())) +])) +/** accepts +Kilroy +**/ +/** writes +What is your name? +Hello, Kilroy +**/ + +/** md +``` + +queries users for their name and then writes a customized greeting. It also +illustrates the use of `++` for string concatenation, as opposed to `+` for +(numerical) addition. +**/ diff --git a/tests/emit_several.fos b/tests/emit_several.fos index c806fd3..e7cbac5 100644 --- a/tests/emit_several.fos +++ b/tests/emit_several.fos @@ -1,7 +1,7 @@ -stream << 1 + 2; 3 >> stream -(4 + 5) >> stream; stream << 6; -stream << 7 -stream << 8 +<<< 1 + 2; 3 >>> +(4 + 5) >>> >> stream; stream << 6; +<<< 7 << 75 +<<< 8 + (9+10); -11 + 12 >> stream; 13 >> stream - >> stream +11 + 12 >>>; 13 >>> + >>> diff --git a/tests/emit_thrice.fos b/tests/emit_thrice.fos index 6042aad..232e2be 100644 --- a/tests/emit_thrice.fos +++ b/tests/emit_thrice.fos @@ -1,4 +1,4 @@ - stream << 72 + 87 + stream << 'Some numbers: ' stream << 88 + 96 99 + 12 >> diff --git a/tests/hw.fos b/tests/hw.fos new file mode 100644 index 0000000..19eade4 --- /dev/null +++ b/tests/hw.fos @@ -0,0 +1 @@ +<<< 'Hello, world!' diff --git a/tests/hw2.fos b/tests/hw2.fos new file mode 100644 index 0000000..c177d31 --- /dev/null +++ b/tests/hw2.fos @@ -0,0 +1 @@ +<<< "Hello,\t\tworld!\n\n" diff --git a/trans/analysis.str b/trans/analysis.str index 70919cc..e0c22ef 100644 --- a/trans/analysis.str +++ b/trans/analysis.str @@ -1,5 +1,4 @@ module analysis - imports statixruntime @@ -51,3 +50,18 @@ rules // Debugging debug-show-analyzed: (sel, _, _, path, projp) -> (filename, result) with filename := path ; result := sel + + // Extract the type assigned to a node by Statix + get-type: node -> type + where + // Assigns variable a to be the result of the Statix analysis of the entire program (or throws an error) + a := node]])>; + // Gets the type of the given node (or throws an error) + type := node]])> node + + fail-msg(|msg) = err-msg(|$[get-type: [msg]]); fail + + // Prints the analyzed type of a selection. + debug-show-type: (sel, _, _, path, projp) -> (filename, result) + with filename := path + ; result := sel diff --git a/trans/desugar.str b/trans/desugar.str index e4f52cf..b22ebd8 100644 --- a/trans/desugar.str +++ b/trans/desugar.str @@ -13,10 +13,12 @@ rules */ deISe: ISequence(Prior(l),x) -> Sequence((l, [x])) - enList: x -> [x] - seqFlatten: Sequence(l) -> Sequence() <+ enList)>l) + seqFlatten: Sequence(l) -> Sequence() <+ ![])>l) + defStream: DefGets(x) -> Gets(Stream(), x) + defStream: DefTo(x) -> To(x, Stream()) + defStream: DefEmits() -> Emits(Stream()) + strategies - desugar-fostr = bottomup(try(deISe <+ seqFlatten)) - \ No newline at end of file + desugar-fostr = bottomup(try(defStream <+ deISe <+ seqFlatten)) diff --git a/trans/fostr.str b/trans/fostr.str index d9a585a..91194d6 100644 --- a/trans/fostr.str +++ b/trans/fostr.str @@ -6,6 +6,7 @@ imports pp outline analysis + ocaml haskell javascript python diff --git a/trans/haskell.str b/trans/haskell.str index eae335e..0e0eb15 100644 --- a/trans/haskell.str +++ b/trans/haskell.str @@ -1,43 +1,99 @@ module haskell -imports libstrategolib signatures/- util +imports libstrategolib signatures/- signature/TYPE util analysis rules - /* Approach: Generate code from the bottom up. - At every node, we create a pair of the implementation and - necessary preamble of IO actions. - We concatenate preambles as we go up. - Finally, at the toplevel we emit the preamble before returning the - final value. + /* Approach: + A) We will define a local transformation taking a term with value strings + at each child to a value string for the node. + B) We will append IO actions needed to set up for the value progressively + to a Preactions rule (mapping () to the list of actions). There will + be a utility `add-preaction` to append a new clause to value of this + rule. + C) We will use bottomup-para to traverse the full AST with the + transformation from A so that we have access to the original expression + (and can get the Statix-associated type when we need to). + Hence the transformation in (A) must actually take a pair of + an (original) term and a term with value strings at each child, + and be certain to return a value string. + + Finally, at the toplevel we emit the result of () before + returning the final value. */ - hs: TopLevel((c,p)) -> $[import System.IO + hs: (_, TopLevel(val)) -> $[-- Preamble from fostr + import System.IO data IOStream = StdIO + -- Danger: These currently assume the stream is StdIO gets :: Show b => a -> b -> IO a gets s d = do putStr(show d) return s + getsStr :: a -> String -> IO a + getsStr s d = do + putStr(d) + return s + + emit s = do + l <- getLine + return (l ++ "\n") + main = do - [p]return [c]] + [()]return [val]] - hs: Stream() -> ("StdIO", "") - hs: Int(x) -> (x, "") - hs: Sum( (c, p), (d, q)) -> ($[([c] + [d])], (p,q)) + hs: (_, Stream()) -> "StdIO" + hs: (_, Int(x)) -> x + hs: (_, LitString(x)) -> x + hs: (_, EscString(x)) -> x + hs: (_, Sum(x, y)) -> $[([x] + [y])] + hs: (_, Concat(x, y)) -> $[([x] ++ [y])] - hs: Gets((c, p), (d, q)) -> (c,d,(p,q),"fosgt") - hsget: (s, x, p, v) -> (v, [p, $[[v] <- [s] `gets` [x]], - "\n"]) + hs: (Gets(_, xn), Gets(s, x)) -> v + with v := "_fostr_get" + ; [$[[v] <- [(s, xn, x)]]] + hs: (To(xn, _), To(x, s)) -> v + with v := "_fostr_to" + ; [$[let [v] = [x]], (s, xn, v)] - hs: To( (c, p), (d, q)) -> (c,d,(p,q),"fosto") - hsto: (x, s, p, v) -> (v, [p, $[let [v] = [x]], "\n", - $[[s] `gets` [v]], "\n"]) + hs_gets: (s, xn, x ) -> $[[s] [xn] [x]] + hs_getOp = get-type; (?STRING() < !"`getsStr`" + !"`gets`") - hs: Terminate((c,p)) -> ($[[c];;], p) - hs: Sequence(l) -> (l, l) + hs: (_, Emits(s)) -> v + with v := "_fostr_emitted" + ; [$[[v] <- emit [s]]] + + hs: (_, Terminate(x)) -> $[[x];;] + hs: (_, Sequence(l)) -> l + /* One drawback of using paramorphism is we have to handle lists + explicitly: + */ + hs: (_, []) -> [] + hs: (_, [x | xs]) -> [x | xs] + + /* Another drawback of using paramorphism is at the very leaves we have + to undouble the tuple: + */ + hs: (x, x) -> x where x + + /* Characters we need to escape in Haskell string constants */ + Hascape: ['\t' | cs ] -> ['\', 't' | cs ] + /* I think I can just use ASCII constants for characters... */ + Hascape: [ 0 | cs ] -> ['\', '0' | cs ] + Hascape: [ 7 | cs ] -> ['\', 'a' | cs ] // Alert + Hascape: [ 8 | cs ] -> ['\', 'b' | cs ] // Backspace + Hascape: [ 11 | cs ] -> ['\', 'v' | cs ] // Vertical tab + Hascape: [ 12 | cs ] -> ['\', 'f' | cs ] // Form feed strategies + haskLitString = un-single-quote + ; string-as-chars(escape-chars(Escape <+ Hascape)) + ; double-quote - haskell = bottomup(try(hs)) + haskell = rules(Preactions: () -> ""); bottomup-para(try(hs)) + + /* See "Approach" at top of file */ + add-preactions = newp := ((), ) + ; rules(Preactions: () -> newp) // Interface haskell code generation with editor services and file system to-haskell: (selected, _, _, path, project-path) -> (filename, result) diff --git a/trans/javascript.str b/trans/javascript.str index 605b46a..9b596ad 100644 --- a/trans/javascript.str +++ b/trans/javascript.str @@ -2,24 +2,57 @@ module javascript imports libstrategolib signatures/- util rules - js: TopLevel(x) -> $[const Stdio = { - gets: v => { process.stdout.write(String(v)); return Stdio; }, + js: TopLevel(x) -> $[// Fostr preamble + const _fostr_readline = require('readline'); + const _fostr_events = require('events'); + const _fostr_rl = _fostr_readline.createInterface({input: process.stdin}); + const Stdio = { + gets: v => { process.stdout.write(String(v)); return Stdio; }, + emit: async () => { + const [line] = await _fostr_events.once(_fostr_rl, 'line'); + return line + "\n"; } } function to(data, strm) { strm.gets(data); return data; } - [x]] + + const _fostr_body = async () => { + // End of preamble + + [x] + + // Fostr coda + _fostr_rl.close() + } + _fostr_body(); + ] + with line := "[line]" js: Stream() -> $[Stdio] js: Int(x) -> x - js: Sum(x,y) -> $[[x] + [y]] + js: LitString(x) -> x + js: EscString(x) -> x + js: Sum(x, y) -> $[[x] + [y]] + js: Concat(x, y) -> $[[x] + [y]] js: Gets(x, y) -> $[[x].gets([y])] js: To(x, y) -> $[to([x],[y])] + js: Emits(x) -> $[(await [x].emit())] js: Terminate(x) -> x js: Sequence(l) -> l + /* Characters we need to escape in Javascript string constants */ + Jscape: ['\t' | cs ] -> ['\', 't' | cs ] + /* I think I can just use ASCII constants for characters... */ + Jscape: [ 0 | cs ] -> ['\', '0' | cs ] + Jscape: [ 8 | cs ] -> ['\', 'b' | cs ] // Backspace + Jscape: [ 11 | cs ] -> ['\', 'v' | cs ] // Vertical tab + Jscape: [ 12 | cs ] -> ['\', 'f' | cs ] // Form feed + strategies + javaLitString = un-single-quote + ; string-as-chars(escape-chars(Escape <+ Jscape)) + ; single-quote javascript = bottomup(try(js)) diff --git a/trans/ocaml.str b/trans/ocaml.str new file mode 100644 index 0000000..049dbf4 --- /dev/null +++ b/trans/ocaml.str @@ -0,0 +1,66 @@ +module ocaml +imports libstrategolib signatures/- util signature/TYPE analysis + +/* Note will use bottomup-para to traverse the full AST so that + we have access to the original expression (and can get the + Statix-associated type when we need to). + + This means that every one of our local rules must take a pair + of an original term and a term with every child replaced by + its generated code. + */ + +rules + ml: (_, TopLevel(x)) -> $[(* fostr preamble *) + type stream = { getS: string -> stream; emitS: unit -> string } + let rec stdio = { + getS = (fun s -> print_string s; stdio); + emitS = (fun () -> (read_line ()) ^ "\n"); + };; + (* End of preamble *) + + [x]] + + ml: (_, Stream()) -> $[stdio] + ml: (_, Int(x)) -> x + ml: (_, LitString(x)) -> $[{|[x]|}] + ml: (_, EscString(x)) -> x + ml: (_, Sum(x, y)) -> $[[x] + [y]] + ml: (_, Concat(x, y)) -> $[[x] ^ [y]] + + ml: (Gets(_,yn), Gets(x, y)) + -> $[([x]).getS ([(yn,y)])] + ml: (To(xn,_), To(x, y)) + -> $[let _fto = ([x]) in (ignore (([y]).getS ([(xn,"_fto")])); _fto)] + ml: (_, Emits(s)) -> $[[s].emitS ()] + + ml: (_, Terminate(x)) -> x + ml: (_, Sequence(l)) -> l + + ml_seq: [x] -> x + ml_seq: [x | xs ] -> $[ignore ([x]); +[xs]] + + /* One drawback of using paramorphism is we have to handle lists + explicitly: + */ + ml: (_, []) -> [] + ml: (_, [x | xs]) -> [x | xs] + + /* Another drawback of using paramorphism is at the very leaves we have + to undouble the tuple: + */ + ml: (x, x) -> x where x + + ml_str: (node, code) -> $[[node]([code])] + +strategies + + ml_string_cast = get-type; (?INT() < !"string_of_int" + !"") + + ocaml = bottomup-para(try(ml)) + + // Interface ocaml code generation with editor services and file system + to-ocaml: (selected, _, _, path, project-path) -> (filename, result) + with filename := path + ; result := selected diff --git a/trans/python.str b/trans/python.str index 238c006..f42b1cb 100644 --- a/trans/python.str +++ b/trans/python.str @@ -2,22 +2,31 @@ module python imports libstrategolib signatures/- util rules - py: TopLevel(x) -> $[import sys + py: TopLevel(x) -> $[## Fostr preamble + import sys class StdioC: def gets(self, v): print(v, file=sys.stdout, end='') return self + def emit(self): + return input() + "\n" # Python inconsistently strips when using input def to(data,strm): strm.gets(data) return data Stdio = StdioC() + ## End of preamble + [x]] py: Stream() -> $[Stdio] py: Int(x) -> x + py: LitString(x) -> $[r[x]] + py: EscString(x) -> x py: Sum(x,y) -> $[[x] + [y]] + py: Concat(x,y) -> $[[x] + [y]] py: Gets(x, y) -> $[[x].gets([y])] py: To(x, y) -> $[to([x],[y])] + py: Emits(x) -> $[[x].emit()] py: Terminate(x) -> $[[x];] py: Sequence(l) -> l diff --git a/trans/statics.stx b/trans/statics.stx index 7772698..ce99bf7 100644 --- a/trans/statics.stx +++ b/trans/statics.stx @@ -1,14 +1,267 @@ module statics imports signatures/fostr-sig +imports signature/TYPE +imports statics/util -// see docs/implementation.md for details on how to switch to multi-file analysis +/** md +Title: Adding Program Analysis with Statix + +## Development of fostr static analysis + +This section is more documentation of Spoofax in general and Statix +in particular than of fostr itself, but is being maintained here in case +it could be either helpful to someone getting started with Statix or +helpful in understanding how the static characteristics of fostr were designed. + +As mentioned in the [Overview](../README.md), I don't like to program and a +corollary of that is never to use a facility unless/until there's a need for +it. So the first few rudimentary passes at fostr simply declared every program +to be "OK" from the point of view of Statix: +```statix +{! "\git docs/statix_start:trans/statics.stx" extract: + start: programOk + stop: (.*TopLevel.*) +!} +``` + +Then I reached the point at which the grammar was basically just +```SDF3 +// Start.TopLevel = +// Seq = +// Seq.Sequence = sq:Ex+ {layout(align-list sq)} +// Ex.Terminated = <;> +{! "\git docs/statix_start:syntax/fostr.sdf3" extract: + start: TermEx.Terminate + stop: (.*bracket.*) +!} +``` +(The first four clauses are in comments because they approximate fostr's +grammar; it actually uses a few more sorts for sequences of +expressions, to achieve fostr's exact layout rules. Also note that the parsing +of literal strings later evolved to include the surrounding single quotes, +because the rule above implicitly allows layout between the quotes and the +string contents, creating ambiguity.) + +This was the first point at which there were two different types that might +need to be written to standard output (Int and String), and although of course +the dynamically-typed Python and Javascript code generated dealt with both fine, +the Haskell code needed to differ depending on the +type of the item written (and I hadn't even started OCaml code generation at +that point since I knew it would be hopeless without statically typing fostr +programs). + +So it was time to bite the bullet and add type checking via Statix to fostr. +The first step was to replace the simple assertion that any TopLevel +is OK with a constraint that its Seq must type properly, and an assignment of +that type to the top level node: +```statix +programOk(tl@TopLevel(seq)) :- {T} + type_Seq(seq) == T, + @tl.type := T. +``` +Of course, for this to even parse, we must have a definition of `type_Seq`: +```statix +{! ../signature/TYPE.stx extract: {start: module, stop: rules} !} +**/ + +// see docs/implementation.md for detail on how to switch to multi-file analysis rules // single-file entry point programOk : Start - programOk(TopLevel(_)). + /** md +rules + type_Seq : Seq -> TYPE +``` + **/ + + type_LineSeq : LineSeq -> TYPE + + programOk(tl@TopLevel(seq)) :- {T} + type_LineSeq(seq) == T, + @tl.type := T. + + /** md +Now to type a Seq, we look to the syntax, and see that there are two +possibilities for what it might be: just an Ex, or a Sequence(_) of a +list of 'Ex's. For the first, Statix does not allow one sort to simply +"become" another, but the Spoofax infrastructure automatically inserts +"injection" constructors for us, in this case one named Ex2Seq. So the +first rule for `type_Seq` is straightforward: + +```statix + type_Seq(s@Ex2Seq(e)) = T : - + type_Ex(e) == T, + @s.type := T. +``` +where of course type_Ex needs its own declaration analogous to the above. + **/ + + type_Line : Line -> TYPE + + type_LineSeq(ls@Line2LineSeq(l)) = T :- + type_Line(l) == T, + @ls.type := T. + + /** md + +The other (and in fact more typical) rule for `type_Seq`, when it actually +consists of a sequence of expressions, is a bit more involved. Fortunately +Statix provides a primitive for mapping over a list, so we can proceed as +follows: +```statix + types_Exs maps type_Ex(list(*)) = list(*) + type_Seq(s@Sequence(l)) = T :- {lt} + types_Exs(l) == lt, + lastTYPE(lt) == T, + @s.type := T. +``` +Here `lastTYPE` is a function that extracts the last TYPE from a list. +Unless/until Statix develops some sort of standard library, it must be +hand-defined, as done in "statics/util.stx" like so: +```statix +{! ../statics/util.stx extract: {start: lastTYPE} !} +``` + **/ + + types_Lines maps type_Line(list(*)) = list(*) + + type_LineSeq(ls@Sequence(l)) = T :- {lt} + types_Lines(l) == lt, + lastTYPE(lt) == T, + @ls.type := T. + + type_OptTermEx : OptTermEx -> TYPE + + type_Line(l@OptTermEx2Line(ote)) = T :- + type_OptTermEx(ote) == T, + @l.type := T. + + type_Ex : Ex -> TYPE + type_TermEx : TermEx -> TYPE + + type_OptTermEx(ote@Ex2OptTermEx(e)) = T :- + type_Ex(e) == T, + @ote.type := T. + + type_OptTermEx(ote@TermEx2OptTermEx(te)) = T :- + type_TermEx(te) == T, + @ote.type := T. + + /** md + +This brings us to the syntax rules for the basic expressions themselves, +which comprise almost all of the remaining fostr language constructs. +But first a mechanism suggested by Ivo Wilms to avoid repeating the node +type annotation in every rule: +```statix + **/ + + /** md */ + ty_Ex : Ex -> TYPE + + type_Ex(e) = ty@ty_Ex(e) :- + @e.type := ty. + /* **/ + + /** md +``` +At this stage in fostr's development, there was no difference between a +terminated and unterminated expression, so the typing rule for that +constructor was trivial: +```statix + ty_Ex(Terminated(e)) = ty_Ex(e). +``` + **/ + + type_TermEx(te@Terminate(e)) = T :- + type_Ex(e) == T, + @te.type := T. + + /** md + +Now typing literals is straightforward: +```statix +{! "\git docs/statix_works:trans/statics.stx" extract: + start: '(.*ty_Ex.Int.*\s*)' + stop: '/. ../' +!} +``` + **/ + + ty_Ex(Int(_)) = INT(). + ty_Ex(LitString(_)) = STRING(). + ty_Ex(EscString(_)) = STRING(). + ty_Ex(e@Stream()) = STREAM(). + + /** md + +Finally we get to the binary operators, and here we use the pattern found in +recent versions of the +"[chicago](https://github.com/MetaBorgCube/statix-sandbox/tree/master/chicago)" +example language and in the Fall 2020 TU-Delft class lecture on +[Name Binding and Name Resolution](https://tudelft-cs4200-2020.github.io/lectures/2020/09/24/lecture5/). +This pattern lets us specify error messages. + +```statix + **/ + + /** md */ + ty_Ex(Sum(e1, e2)) = INT() :- + type_Ex(e1) == INT() | error $[Expression [e1] not an Int in sum.]@e1, + type_Ex(e2) == INT() | error $[Expression [e2] not an Int in sum.]@e2. + + ty_Ex(Gets(e1, e2)) = STREAM() :- {T} + type_Ex(e1) == STREAM() | error $[Only Streams may receive items.]@e1, + type_Ex(e2) == T. + + ty_Ex(To(e1, e2)) = T :- + type_Ex(e1) == T, + type_Ex(e2) == STREAM() | error $[Items may only be sent to Streams.]@e2. + /* **/ + + ty_Ex(Concat(e1, e2)) = STRING() :- + type_Ex(e1) == STRING() | error $[Expression [e1] not String in concat.]@e1, + type_Ex(e2) == STRING() | error $[Expression [e2] not String in concat.]@e2. + + ty_Ex(Emits(e)) = STRING() :- // At the moment, only stream is stdio + type_Ex(e) == STREAM() | error $[Only Streams may emit items.]@e. + + /** md +``` + +### Using type annotations in transformation + +At this point, Statix properly types all of the valid programs of the very +rudimentary language defined by the grammar above. But the proximate purpose +for implementing this typing was to aid Haskell code generation. So how +do we actually use the assigned types in a Stratego transformation? + +Statix provides a Stratego api that includes, among other items, strategies +`stx-get-ast-analysis` and `stx-get-ast-type(|analysis)` that provide access +to the assigned types. However, it's easiest to use the information via +a wrapper like this, essentially lifted from the "chicago" language project: +```stratego +{! analysis.str extract: + start: Extract.the.type +terminate: Prints.the.analyzed.type +!} +``` + +Now `get_type` run on a node of the analyzed AST produces the assigned `TYPE` +(as an ATerm in the constructors of sort TYPE in Statix). + +Thus, you can select on the assigned type, as in the strategy to select +the correct Haskell operator to use to send an item to standard output: +```stratego +{! haskell.str extract: + start: '(.*hs_getOp.=.*)' + stop: \s +!} +``` + **/ rules // multi-file entry point