From a56efa0c916eb98d9ab1fe9f0935674440394c26 Mon Sep 17 00:00:00 2001 From: Glen Whitney Date: Mon, 5 Aug 2019 16:45:04 -0400 Subject: [PATCH] Prepare for parsing expressions into parse trees Prior to this change, every evaluation re-parses the expression. The intermediate form as a parse tree is never stored anywhere. This change adds a new token type, FUNCALL, which acts as a node in a parse tree, and adds an (untested) evaluation method which returns the unevaluated parse tree as opposed to evaluating as it goes. This is preparation for a following step in which rather than storing a token sequence for each variety of content, teapot will store a single token representing the parse tree, allowing for quicker evaluation when the time comes; no actual parsing will have to occur on evaluation. --- src/common/eval.c | 126 ++++++++--- src/common/eval.h | 3 +- src/common/func.c | 189 ++++++++++++++-- src/common/main.c | 4 +- src/common/parser.c | 505 +++++++++++++++++++++++++++++-------------- src/common/parser.h | 6 +- src/common/scanner.c | 19 +- src/common/scanner.h | 17 +- src/common/sheet.c | 4 +- 9 files changed, 646 insertions(+), 227 deletions(-) diff --git a/src/common/eval.c b/src/common/eval.c index b3ccbca..b324d58 100644 --- a/src/common/eval.c +++ b/src/common/eval.c @@ -122,6 +122,7 @@ void tvecfreetoks(Token **tvec) } /*}}}*/ + /* tvecfree -- free a vector of pointer to tokens entirely */ /*{{{*/ void tvecfree(Token **tvec) { @@ -240,6 +241,40 @@ Token tadd(Token l, Token r) return result; } /*}}}*/ + +/* tconcat -- concat operands as strings */ /*{{{*/ +Token tconcat(Token l, Token r) +{ + /* variables */ /*{{{*/ + static int conc_buf_len = 1024; + Token result; + char buf[conc_buf_len]; + const char *buferr = _("Internal string concatenation buffer too small"); + int len; + /*}}}*/ + + if (l.type==EEK) return tcopy(l); + if (r.type==EEK) return tcopy(r); + len = printtok(buf, conc_buf_len, 0, 0, DEF_SCIENTIFIC, def_precision, 0, &l); + if (len > conc_buf_len - 2) + { + duperror(&result, buferr); + return result; + } + len += printtok(buf + len, conc_buf_len - len - 1, 0, 0, + DEF_SCIENTIFIC, def_precision, 0, &r); + if (len > conc_buf_len - 2) + { + duperror(&result, buferr); + return result; + } + buf[len] = '\0'; + result.type = STRING; + result.u.string = strdup(buf); + return result; +} +/*}}}*/ + /* tsub -- binary - operator */ /*{{{*/ Token tsub(Token l, Token r) { @@ -512,54 +547,45 @@ Token tmul(Token l, Token r) int len; /*}}}*/ - if (l.type==EEK) - /* return left error */ /*{{{*/ - result=tcopy(l); - /*}}}*/ - else if (r.type==EEK) - /* return right error */ /*{{{*/ - result=tcopy(r); - /*}}}*/ - else if (l.type==INT && r.type==INT) + if (l.type == EEK) result = tcopy(l); + else if (r.type == EEK) result = tcopy(r); + else if (l.type == INT && r.type == INT) /* result is int product of left int and right int */ /*{{{*/ { - result=l; - result.u.integer=l.u.integer*r.u.integer; + result = l; + result.u.integer = l.u.integer*r.u.integer; } /*}}}*/ - else if (l.type==FLOAT && r.type==FLOAT) + else if (l.type == FLOAT && r.type == FLOAT) /* result is float product of left float and right float */ /*{{{*/ { - result.type=FLOAT; - result.u.flt=l.u.flt*r.u.flt; + result.type = FLOAT; + result.u.flt = l.u.flt*r.u.flt; } /*}}}*/ - else if ((l.type==EMPTY && r.type==INT) || (l.type==INT && r.type==EMPTY)) + else if ((l.type == EMPTY && r.type == INT) + || (l.type == INT && r.type == EMPTY)) /* result is 0 */ /*{{{*/ { - result.type=INT; - result.u.integer=0; + result.type = INT; + result.u.integer = 0; } /*}}}*/ - else if ((l.type==EMPTY && r.type==FLOAT) || (l.type==FLOAT && r.type==EMPTY)) + else if ((l.type == EMPTY && r.type == FLOAT) + || (l.type == FLOAT && r.type == EMPTY)) /* result is 0.0 */ /*{{{*/ { - result.type=FLOAT; - result.u.flt=0.0; + result.type = FLOAT; + result.u.flt = 0.0; } /*}}}*/ - else if (l.type==INT && r.type==FLOAT) - /* result is float product of left int and right float */ /*{{{*/ + else if ((l.type == INT && r.type == FLOAT) + || (l.type == FLOAT && r.type==INT)) + /* result is float product of int and float */ /*{{{*/ { - result.type=FLOAT; - result.u.flt=((double)l.u.integer)*r.u.flt; - } - /*}}}*/ - else if (l.type==FLOAT && r.type==INT) - /* result is float product of left float and right int */ /*{{{*/ - { - result.type=FLOAT; - result.u.flt=l.u.flt*((double)r.u.integer); + result.type = FLOAT; + if (l.type == INT) result.u.flt = ((double)l.u.integer) * r.u.flt; + else result.u.flt = l.u.flt * ((double)r.u.integer); } /*}}}*/ else if (l.type==EMPTY && r.type==EMPTY) @@ -585,6 +611,39 @@ Token tmul(Token l, Token r) result.u.integer += l.u.location[len] * r.u.location[len]; } /*}}}*/ + else if ((l.type == INT && r.type == STRING) + || (l.type == STRING && r.type == INT)) + /* result is n copies of string concatenated together */ /*{{{*/ + { + int copies; + char *pat; + char *newval = NULL; + if (l.type == INT) + { + copies = l.u.integer; pat = strdup(r.u.string); + } else { + copies = r.u.integer; pat = strdup(l.u.string); + } + if (copies == 0) result.type = EMPTY; + else + { + size_t len = strlen(pat); + if (copies < 0) /* negative coefficient means reverse string */ + { + char *tmp = strdup(pat); + int j = 0; + for (int i = len - 1; i >= 0; --i, ++j) tmp[j] = pat[i]; + free(pat); + pat = tmp; + copies = -copies; + } + result.type = STRING; + result.u.string = malloc(len * copies + 1); + for (size_t c = 0; c < copies; ++c) + strcpy(result.u.string + c*len, pat); + result.u.string[copies*len] = '\0'; + } + } else /* result is product type error */ /*{{{*/ { @@ -745,10 +804,11 @@ Token tpow(Token l, Token r) return result; } /*}}}*/ + /* tfuncall -- function operator */ /*{{{*/ -Token tfuncall(Token *ident, int argc, Token argv[]) +Token tfuncall(int fident, int argc, Token argv[]) { - return tfunc[ident->u.fident].func(argc, argv); + return tfunc[fident].func(argc, argv); } /*}}}*/ diff --git a/src/common/eval.h b/src/common/eval.h index 2b035f6..50c2eaf 100644 --- a/src/common/eval.h +++ b/src/common/eval.h @@ -14,9 +14,10 @@ Token tdiv(Token l, Token r); Token tmod(Token l, Token r); Token tmul(Token l, Token r); Token tadd(Token l, Token r); +Token tconcat(Token l, Token r); Token tsub(Token l, Token r); Token tneg(Token x); -Token tfuncall(Token *ident, int argc, Token argv[]); +Token tfuncall(int fident, int argc, Token argv[]); Token tlt(Token l, Token r); Token tle(Token l, Token r); Token tge(Token l, Token r); diff --git a/src/common/func.c b/src/common/func.c index 1e1e999..cb84bce 100644 --- a/src/common/func.c +++ b/src/common/func.c @@ -305,11 +305,6 @@ static double deg2rad(double x) } /*}}}*/ -static void duperror(Token* tok, const char* erro) { - tok->type = EEK; - tok->u.err = strdup(erro); -} - typedef enum {ABSOLUTE, RELATIVE, EXCEL} LocConvention; static Token excel_adr_func(int argc, const Token argv[]); @@ -561,7 +556,7 @@ static Token eval_func(int argc, const Token argv[]) if (LOC_WITHIN(upd_sheet, argv[0].u.location)) contents = getcont(CELL_AT(upd_sheet,argv[0].u.location), CONTINGENT); if (contents == EMPTY_TVEC) result.type = EMPTY; - else result=eval(contents); + else result = eval(contents, FULL); } /*}}}*/ else duperror(&result, _("Usage: eval(location)")); @@ -653,17 +648,53 @@ static Token string_func(int argc, const Token argv[]) } /*}}}*/ +/* plus */ /*{{{*/ +static Token concat_func(int argc, const Token argv[]) +{ + Token result, tmp; + /* Try to add up the args */ + result.type = EMPTY; + for (size_t i = 0; i < argc; ++i) + { + tmp = tconcat(result, argv[i]); + tfree(&result); + result = tmp; + if (result.type == EEK) return result; + } + return result; +} +/*}}}*/ + +/* plus */ /*{{{*/ +static Token plus_func(int argc, const Token argv[]) +{ + Token result, tmp; + /* Try to add up the args */ + result.type = EMPTY; + if (argc > 0) result = tcopy(argv[0]); + for (size_t i = 1; i < argc; ++i) + { + tmp = tadd(result, argv[i]); + tfree(&result); + result = tmp; + if (result.type == EEK) return result; + } + return result; +} +/*}}}*/ + /* sum */ /*{{{*/ static Token sum_func(int argc, const Token argv[]) { Token result; const char *usage = _("Usage: sum(loc_start, loc_end)|sum(val1, val2,...)"); - if (argc == 0) { + if (argc <= 0) { duperror(&result, usage); return result; } - if (argc==2 && argv[0].type==LOCATION && argv[1].type==LOCATION) /* result is sum */ /*{{{*/ + if (argc == 2 && argv[0].type == LOCATION && argv[1].type == LOCATION) + /* result is sum of entries in range */ /*{{{*/ { /* variables */ /*{{{*/ Location w; @@ -675,7 +706,7 @@ static Token sum_func(int argc, const Token argv[]) x1=argv[0].u.location[0]; x2=argv[1].u.location[0]; posorder(&x1,&x2); y1=argv[0].u.location[1]; y2=argv[1].u.location[1]; posorder(&y1,&y2); z1=argv[0].u.location[2]; z2=argv[1].u.location[2]; posorder(&z1,&z2); - result.type=EMPTY; + result.type = EMPTY; for (w[X]=x1; w[X]<=x2; ++(w[X])) for (w[Y]=y1; w[Y]<=y2; ++(w[Y])) for (w[Z]=z1; w[Z]<=z2; ++(w[Z])) @@ -688,19 +719,9 @@ static Token sum_func(int argc, const Token argv[]) result=tmp; if (result.type==EEK) return result; } - } else { - Token tmp; - /* Try to add up the args */ - result = tcopy(argv[0]); - for (size_t i = 1; i < argc; ++i) - { - tmp = tadd(result, argv[i]); - tfree(&result); - result = tmp; - if (result.type == EEK) return result; - } + return result; } - return result; + return plus_func(argc, argv); } /*}}}*/ @@ -743,6 +764,103 @@ static Token n_func(int argc, const Token argv[]) } /*}}}*/ +/* mul */ /*{{{*/ +static Token mul_func(int argc, const Token argv[]) +{ + Token result, tmp; + /* Try to multiply up the args */ + result.type = EMPTY; + if (argc > 0) result = tcopy(argv[0]); + for (size_t i = 1; i < argc; ++i) + { + tmp = tmul(result, argv[i]); + tfree(&result); + result = tmp; + if (result.type == EEK) return result; + } + return result; +} +/*}}}*/ + +/* pow */ /*{{{*/ +static Token pow_func(int argc, const Token argv[]) +{ + Token result, tmp; + /* Try to power up the args, ((a^b)^c)^d etc */ + result.type = EMPTY; + if (argc > 0) result = tcopy(argv[0]); + for (size_t i = 1; i < argc; ++i) + { + tmp = tpow(result, argv[i]); + tfree(&result); + result = tmp; + if (result.type == EEK) return result; + } + return result; +} +/*}}}*/ + +/* binop_func -- handles all binary operations as function calls */ +static Token binop_func(int argc, const Token argv[], + Token (*tfunc)(Token, Token)) /*{{{*/ +{ + if (argc == 2) return tfunc(argv[0], argv[1]); + Token err; + duperror(&err, _("Binary infix op as function requires exactly 2 args")); + return err; +} +/*}}}*/ + +static Token minus_func(int argc, const Token argv[]) +{ + return binop_func(argc, argv, tsub); +} + +static Token div_func(int argc, const Token argv[]) +{ + return binop_func(argc, argv, tdiv); +} + +static Token le_func(int argc, const Token argv[]) +{ + return binop_func(argc, argv, tle); +} + +static Token ge_func(int argc, const Token argv[]) +{ + return binop_func(argc, argv, tge); +} + +static Token lt_func(int argc, const Token argv[]) +{ + return binop_func(argc, argv, tlt); +} + +static Token gt_func(int argc, const Token argv[]) +{ + return binop_func(argc, argv, tgt); +} + +static Token isequal_func(int argc, const Token argv[]) +{ + return binop_func(argc, argv, teq); +} + +static Token abouteq_func(int argc, const Token argv[]) +{ + return binop_func(argc, argv, tabouteq); +} + +static Token ne_func(int argc, const Token argv[]) +{ + return binop_func(argc, argv, tne); +} + +static Token mod_func(int argc, const Token argv[]) +{ + return binop_func(argc, argv, tmod); +} + /* int */ /*{{{*/ static Token int_func(int argc, const Token argv[]) { @@ -1334,7 +1452,19 @@ static Token time_func(int argc, const Token argv[]) return result; } /*}}}*/ - + +/* negate -- unary - */ +static Token negate_func(int argc, const Token argv[]) +{ + if (argc != 1) + { + Token err; + duperror(&err, _("Usage: -EXPR|negate(expr)")); + return err; + } + return tneg(argv[0]); +} + /* table of functions */ /*{{{*/ /* The order of these entries has no influence on performance, but to stay compatible, new entries should be appended. */ @@ -1387,6 +1517,21 @@ Tfunc tfunc[]= { "D", rel_adr_func }, { "X", excel_at_func }, { "X&", excel_adr_func }, + { "negate", negate_func }, + { "+", plus_func }, + { "-", minus_func }, + { "*", mul_func }, + { "/", div_func }, + { "<=", le_func }, + { ">=", ge_func }, + { "<", lt_func }, + { ">", gt_func }, + { "==", isequal_func }, + { "~=", abouteq_func }, + { "!=", ne_func }, + { "^", pow_func }, + { "%", mod_func }, + { "concat", concat_func }, { "", (Token (*)(int, const Token[]))0 } }; /*}}}*/ diff --git a/src/common/main.c b/src/common/main.c index 23f51d6..28c9dbf 100644 --- a/src/common/main.c +++ b/src/common/main.c @@ -1507,8 +1507,8 @@ static int do_goto(Sheet *sheet, const char *expr) Token value; LOCATION_GETS(upd_l, sheet->cur); - upd_sheet=sheet; - value=eval(t); + upd_sheet = sheet; + value = eval(t, FULL); tvecfree(t); if (value.type==LOCATION && IN_OCTANT(value.u.location)) movetoloc(sheet, value.u.location); diff --git a/src/common/parser.c b/src/common/parser.c index c9063bb..5dd6679 100644 --- a/src/common/parser.c +++ b/src/common/parser.c @@ -31,119 +31,145 @@ extern char *strdup(const char* s); /*}}}*/ /* prototypes */ /*{{{*/ -static Token term(Token *n[], int *i); +static Token term(Token *n[], int *i, EvalMethod meth); /*}}}*/ /* primary -- parse and evaluate a primary term */ /*{{{*/ -static Token primary(Token *n[], int *i) +static Token primary(Token *n[], int *i, EvalMethod meth) { /* variables */ /*{{{*/ - int argc,j; + int argc; + int fident = -2; Token *ident,argv[MAXARGC],result; /*}}}*/ - if (n[*i]==(Token*)0) + if (n[*i] == NULLTOKEN) /* error */ /*{{{*/ { - result.type=EEK; - result.u.err=strcpy(malloc(strlen(_("missing operator"))+1),_("missing operator")); + duperror(&result, _("missing operator")); return result; } /*}}}*/ - else switch (n[*i]->type) + switch (n[*i]->type) { /* STRING, FLOAT, INT */ /*{{{*/ case STRING: case FLOAT: case INT: - { return tcopy(*n[(*i)++]); + /*}}}*/ + + /* LIDENT */ /*{{{*/ + case LIDENT: + { + ident = n[*i]; + ++(*i); + if (meth == FULL) return findlabel(upd_sheet,ident->u.lident); + return tcopy(*ident); } /*}}}*/ + /* OPERATOR */ /*{{{*/ case OPERATOR: { - if (n[*i]->u.op==OP) - /* return paren term */ /*{{{*/ + switch (n[*i]->u.op) { + case OP: /* return paren term */ /*{{{*/ ++(*i); - result=term(n,i); - if (result.type==EEK) return result; - if (n[*i]!=(Token*)0 && n[*i]->type==OPERATOR && n[*i]->u.op==CP) - { + result = term(n, i, meth); + if (result.type == EEK) return result; + if (n[*i] != NULLTOKEN && n[*i]->type == OPERATOR && n[*i]->u.op==CP) + { ++(*i); return result; } tfree(&result); - result.type=EEK; - result.u.err=strcpy(malloc(strlen(_(") expected"))+1),_(") expected")); + duperror(&result, _(") expected")); return result; - } - /*}}}*/ - else if (n[*i]->u.op==MINUS) - /* return negated term */ /*{{{*/ + /*}}}*/ + case MINUS: /* return negated term */ /*{{{*/ { ++(*i); - return(tneg(primary(n,i))); + if (meth == FULL) return tneg(primary(n, i, meth)); + Token arg = primary(n, i, meth); + if (arg.type == EEK) return arg; + result.type = FUNCALL; + result.u.funcall.fident = identcode("negate", 6); + result.u.funcall.argc = 1; + result.u.funcall.argv = malloc(sizeof(Token)); + result.u.funcall.argv[0] = arg; + return result; } - /*}}}*/ - else - /* return error, value expected */ /*{{{*/ - { - result.type=EEK; - result.u.err=strdup(_("value expected")); - return result; + case CP: + duperror(&result, _("Extra umatched ')'")); + return result; + case COMMA: + duperror(&result, _("Occurrence of ',' outside parameter list")); + return result; + default: + /* Can also use any infix symbol as a function, but only with parens, not + bare */ + if (n[(*i)+1] == NULLTOKEN || n[(*i)+1]->type != OPERATOR + || n[(*i)+1]->u.op != OP) + { + const char *templ = "To use %s as function symbol, must use %s(...)"; + result.type = EEK; + result.u.err = malloc(strlen(templ) + 2 * MAX_OP_NAME_LENGTH + 1); + sprintf(result.u.err, templ, + Op_Name[n[*i]->u.op], Op_Name[n[*i]->u.op]); + return result; + } + fident = identcode(Op_Name[n[*i]->u.op], strlen(Op_Name[n[*i]->u.op])); + /* FALL THROUGH TO PROCESS OPERATOR AS FUNCTION CALL */ } - /*}}}*/ - } - /*}}}*/ - /* LIDENT */ /*{{{*/ - case LIDENT: - { - ident=n[*i]; - ++(*i); - return findlabel(upd_sheet,ident->u.lident); } /*}}}*/ + /* FIDENT */ /*{{{*/ case FIDENT: { - ident=n[*i]; + if (fident == -2) fident = n[*i]->u.fident; ++(*i); - if (n[*i]!=(Token*)0 && n[*i]->type==OPERATOR && n[*i]->u.op==OP) - /* parse arguments and closing paren of function call, return its value */ /*{{{*/ + if (n[*i] == NULLTOKEN || n[*i]->type != OPERATOR + || n[*i]->u.op != OP) + argc = -1; + else /* parse arguments and closing paren of function call */ /*{{{*/ { ++(*i); - argc=0; - if (!(n[*i]!=(Token*)0 && n[*i]->type==OPERATOR && n[*i]->u.op==CP)) + argc = 0; + if (!(n[*i] != NULLTOKEN && n[*i]->type==OPERATOR && n[*i]->u.op==CP)) /* parse at least one argument */ /*{{{*/ { - if (n[*i]!=(Token*)0 && n[*i]->type==OPERATOR && n[*i]->u.op==COMMA) + if (n[*i] != NULLTOKEN && n[*i]->type==OPERATOR && n[*i]->u.op==COMMA) /* empty argument */ /*{{{*/ { - argv[argc].type=EMPTY; + argv[argc].type = EMPTY; } /*}}}*/ - else argv[argc]=term(n,i); - if (argv[argc].type==EEK) return argv[argc]; + else argv[argc] = term(n, i, meth); + if (argv[argc].type == EEK) return argv[argc]; ++argc; - while (n[*i]!=(Token*)0 && n[*i]->type==OPERATOR && n[*i]->u.op==COMMA) + while (n[*i] != NULLTOKEN && n[*i]->type == OPERATOR + && n[*i]->u.op == COMMA) /* parse the following argument */ /*{{{*/ { ++(*i); - if (argc<=MAXARGC) + if (argc < MAXARGC) { - if (n[*i]!=(Token*)0 && n[*i]->type==OPERATOR && (n[*i]->u.op==COMMA || n[*i]->u.op==CP)) + if (n[*i] != NULLTOKEN && n[*i]->type == OPERATOR && + (n[*i]->u.op == COMMA || n[*i]->u.op == CP)) { argv[argc].type=EMPTY; - } - else argv[argc]=term(n,i); - } - else - { - result.type=EEK; - result.u.err=strcpy(malloc(strlen(_("too many arguments"))+1),_("too many arguments")); - for (j=0; j<=argc; ++j) tfree(&argv[j]); + } else { + argv[argc] = term(n, i, meth); + if (argv[argc].type == EEK) { + for (size_t pa = 0; pa < argc; +pa) tfree(argv + pa); + return argv[argc]; + } + } + } else { + duperror(&result, _("too many arguments")); + for (size_t j=0; j < argc; ++j) tfree(&argv[j]); return result; } ++argc; @@ -151,176 +177,335 @@ static Token primary(Token *n[], int *i) /*}}}*/ } /*}}}*/ - if (n[*i]!=(Token*)0 && n[*i]->type==OPERATOR && n[*i]->u.op==CP) - /* eval function */ /*{{{*/ - { - ++(*i); - result = tfuncall(ident,argc,argv); - /* To allow a function to return one of its arguments, we need - to be sure not to free that argument: */ - for (j=0; jtype != OPERATOR || n[*i]->u.op != CP) /* ) expected */ /*{{{*/ { - for (j=0; j 0) + { + result.u.funcall.argv = malloc(argc*sizeof(Token)); + for (size_t ai; ai < argc; ++ai) + result.u.funcall.argv[ai] = argv[ai]; + } + return result; } /*}}}*/ + + /* FUNCALL */ /*{{{*/ + case FUNCALL: + if (meth == FULL) + result = tfuncall(n[*i]->u.funcall.fident, n[*i]->u.funcall.argc, + n[*i]->u.funcall.argv); + else result = tcopy(*n[*i]); + ++(*i); + return result; + /*}}}*/ + default: ; /* fall through */ } - result.type=EEK; - result.u.err=strdup(_("value expected")); + duperror(&result, _("value expected")); return result; } /*}}}*/ + /* powterm -- parse and evaluate a x^y term */ /*{{{*/ -static Token powterm(Token *n[], int *i) +static Token powterm(Token *n[], int *i, EvalMethod meth) { Token l; + size_t npows = 0; - l=primary(n,i); - if (l.type==EEK) return l; - while (n[*i]!=(Token*)0 && n[*i]->type==OPERATOR && n[*i]->u.op==POW) + l = primary(n, i, meth); + if (l.type == EEK) return l; + while (n[*i] != (Token*)0 && n[*i]->type == OPERATOR && n[*i]->u.op == POW) { - Token result,r; + Token r; ++(*i); - r=primary(n,i); - result=tpow(l,r); - tfree(&l); - tfree(&r); - if (result.type==EEK) return result; - l=result; + r = primary(n,i,meth); + if (meth == FULL) + { + Token result = tpow(l,r); + tfree(&l); + tfree(&r); + if (result.type == EEK) return result; + l = result; + } else { + if (r.type == EEK) + { + tfree(&l); + return r; + } + if (npows == 0) + { + Token tmp = l; + l.type = FUNCALL; + l.u.funcall.fident = identcode("^", 1); + l.u.funcall.argc = 1; + l.u.funcall.argv = malloc(MAXARGC * sizeof(Token)); + l.u.funcall.argv[0] = tmp; + } + if (npows + 1 >= MAXARGC) + { + tfree(&l); + tfree(&r); + duperror(&l, _("Exceeded maximum sequence length of ^")); + return l; + } + ++npows; ++(l.u.funcall.argc); + l.u.funcall.argv[npows] = r; + } } return l; } /*}}}*/ + /* piterm -- parse and evaluate a product/division/modulo term */ /*{{{*/ -static Token piterm(Token *n[], int *i) + static Token piterm(Token *n[], int *i, EvalMethod meth) { + int mulident = identcode("*", 1); Token l; + Operator op = CP; + bool first_funcall = true; - l=powterm(n,i); - if (l.type==EEK) return l; - while (n[*i]!=(Token*)0 && n[*i]->type==OPERATOR && (n[*i]->u.op==DIV || n[*i]->u.op==MUL || n[*i]->u.op==MOD)) + l = powterm(n, i, meth); + if (l.type == EEK) return l; + if (n[*i] != NULLTOKEN && n[*i]->type == OPERATOR) op = n[*i]->u.op; + + while (op == DIV || op == MUL || op == MOD) { - Operator op; - Token result,r; + Token r; - op=n[*i]->u.op; ++(*i); - r=powterm(n,i); - switch (op) + r = powterm(n, i, meth); + if (meth == FULL) { - case MUL: result=tmul(l,r); break; - case DIV: result=tdiv(l,r); break; - case MOD: result=tmod(l,r); break; + Token result; + switch (op) + { + case MUL: result = tmul(l,r); break; + case DIV: result = tdiv(l,r); break; + case MOD: result = tmod(l,r); break; default: assert(0); + } + tfree(&l); + tfree(&r); + if (result.type == EEK) return result; + l = result; + } else { + if (r.type == EEK) + { + tfree(&l); + return r; + } + if (first_funcall || l.u.funcall.fident != mulident || op != MUL) + { + first_funcall = false; + Token tmp = l; + l.u.funcall.fident = identcode(Op_Name[op], strlen(Op_Name[op])); + l.u.funcall.argc = 2; + if (op == MUL) l.u.funcall.argv = malloc(MAXARGC * sizeof(Token)); + else l.u.funcall.argv = malloc(2*sizeof(Token)); + l.u.funcall.argv[0] = tmp; + l.u.funcall.argv[1] = r; + } else { + if (l.u.funcall.argc >= MAXARGC) + { + tfree(&r); + tfree(&l); + duperror(&l, _("Exceeded maximum sequence length of *")); + return l; + } + l.u.funcall.argv[(l.u.funcall.argc)++] = r; + } } - tfree(&l); - tfree(&r); - if (result.type==EEK) return result; - l=result; + if (n[*i] != NULLTOKEN && n[*i]->type == OPERATOR) op = n[*i]->u.op; + else op = CP; } return l; } /*}}}*/ + /* factor -- parse and evaluate a factor of sums/differences */ /*{{{*/ -static Token factor(Token *n[], int *i) +static Token factor(Token *n[], int *i, EvalMethod meth) { + int plusident = identcode("+", 1); Token l; + Operator op = CP; + bool first_funcall = true; - l=piterm(n,i); - if (l.type==EEK) return l; - while (n[*i]!=(Token*)0 && n[*i]->type==OPERATOR && (n[*i]->u.op==PLUS || n[*i]->u.op==MINUS)) + l = piterm(n, i, meth); + if (l.type == EEK) return l; + if (n[*i] != NULLTOKEN && n[*i]->type == OPERATOR) op = n[*i]->u.op; + + while (op == PLUS || op == MINUS) { - Operator op; - Token result,r; + Token r; - op=n[*i]->u.op; ++(*i); - r=piterm(n,i); - result=(op==PLUS ? tadd(l,r) : tsub(l,r)); - tfree(&l); - tfree(&r); - if (result.type==EEK) return result; - l=result; + r = piterm(n, i, meth); + if (meth == FULL) + { + Token result = (op==PLUS ? tadd(l,r) : tsub(l,r)); + tfree(&l); + tfree(&r); + if (result.type == EEK) return result; + l = result; + } else { + if (r.type == EEK) + { + tfree(&l); + return r; + } + if (first_funcall || l.u.funcall.fident != plusident || op != PLUS) + { + first_funcall = false; + Token tmp = l; + l.u.funcall.fident = identcode(Op_Name[op], strlen(Op_Name[op])); + l.u.funcall.argc = 2; + if (op == PLUS) l.u.funcall.argv = malloc(MAXARGC * sizeof(Token)); + else l.u.funcall.argv = malloc(2*sizeof(Token)); + l.u.funcall.argv[0] = tmp; + l.u.funcall.argv[1] = r; + } else { + if (l.u.funcall.argc >= MAXARGC) + { + tfree(&r); + tfree(&l); + duperror(&l, _("Exceeded maximum sequence length of +")); + return l; + } + l.u.funcall.argv[(l.u.funcall.argc)++] = r; + } + } + if (n[*i] != NULLTOKEN && n[*i]->type == OPERATOR) op = n[*i]->u.op; + else op = CP; } return l; } /*}}}*/ + /* term -- parse and evaluate a relational term */ /*{{{*/ -static Token term(Token *n[], int *i) +static Token term(Token *n[], int *i, EvalMethod meth) { - Token l; - - l=factor(n,i); - if (l.type==EEK) return l; - while (n[*i]!=(Token*)0 && n[*i]->type==OPERATOR && n[*i]->u.op>=LT && n[*i]->u.op<=NE) + Token l = factor(n, i, meth); + if (l.type == EEK) return l; + /* a < b < c used to mean (a < b) < c, but that does not make sense really + because there is not an ordering on bools (if we had a separate bool + type). So restrict to a single binary relation; one can still use parens + to get the old, odd behavior */ + if (n[*i] != NULLTOKEN && n[*i]->type == OPERATOR + && n[*i]->u.op >= LT && n[*i]->u.op <= NE) { - Operator op; - Token result,r; + Operator op = n[*i]->u.op; + Token result, r; - op=n[*i]->u.op; ++(*i); - r=factor(n,i); - switch (op) + r = factor(n, i, meth); + if (meth == FULL) { - case LT: result=tlt(l,r); break; - case LE: result=tle(l,r); break; - case GE: result=tge(l,r); break; - case GT: result=tgt(l,r); break; - case ISEQUAL: result=teq(l,r); break; - case ABOUTEQ: result=tabouteq(l,r); break; - case NE: result=tne(l,r); break; - default: assert(0); + switch (op) + { + case LT: result=tlt(l,r); break; + case LE: result=tle(l,r); break; + case GE: result=tge(l,r); break; + case GT: result=tgt(l,r); break; + case ISEQUAL: result=teq(l,r); break; + case ABOUTEQ: result=tabouteq(l,r); break; + case NE: result=tne(l,r); break; + default: assert(0); + } + tfree(&l); + tfree(&r); + if (result.type == EEK) return result; + l = result; + } else { + if (r.type == EEK) + { + tfree(&l); + return r; + } + Token tmp = l; + l.type = FUNCALL; + l.u.funcall.fident = identcode(Op_Name[op], strlen(Op_Name[op])); + l.u.funcall.argc = 2; + l.u.funcall.argv = malloc(2*sizeof(Token)); + l.u.funcall.argv[0] = tmp; + l.u.funcall.argv[1] = r; } - tfree(&l); - tfree(&r); - if (result.type==EEK) return result; - l=result; } return l; } /*}}}*/ /* eval -- parse and evaluate token sequence */ /*{{{*/ -Token eval(Token **n) +Token eval(Token **n, EvalMethod meth) { - Token result; - int i; + Token l; + int i = 0; + bool first_funcall = true; - assert(upd_sheet!=(Sheet*)0); - i=0; - result=term(n,&i); - if (result.type==EEK) return result; - if (n[i]!=(Token*)0) + assert(upd_sheet != (Sheet*)0); + l = term(n, &i, meth); + if (l.type == EEK) return l; + + while (n[i] != NULLTOKEN) { - tfree(&result); - result.type=EEK; - result.u.err=strcpy(malloc(strlen(_("parse error after term"))+1),_("parse error after term")); - return result; + Token r = term(n, &i, meth); + + if (meth == FULL) + { + Token result = tconcat(l,r); + tfree(&l); + tfree(&r); + if (result.type == EEK) return result; + l = result; + } else { + if (r.type == EEK) + { + tfree(&l); + return r; + } + if (first_funcall) + { + first_funcall = false; + Token tmp = l; + l.u.funcall.fident = identcode("concat", 6); + l.u.funcall.argc = 1; + l.u.funcall.argv = malloc(MAXARGC*sizeof(Token)); + l.u.funcall.argv[0] = l; + } + if (l.u.funcall.argc >= MAXARGC) + { + tfree(&l); + tfree(&r); + duperror(&l, _("Exceeded max sequence lentgh of concatenated terms")); + return l; + } + l.u.funcall.argv[(l.u.funcall.argc)++] = r; + } } - return result; + return l; } /*}}}*/ /* eval_safe -- like eval, but handles null pointer to token sequence */ /*{{{*/ -Token eval_safe(Token **n) +Token eval_safe(Token **n, EvalMethod meth) { Token result; if (n == EMPTY_TVEC) @@ -328,5 +513,5 @@ Token eval_safe(Token **n) result.type = EMPTY; return result; } - return eval(n); + return eval(n, meth); } diff --git a/src/common/parser.h b/src/common/parser.h index 5e25f31..101c673 100644 --- a/src/common/parser.h +++ b/src/common/parser.h @@ -3,7 +3,9 @@ #include "scanner.h" -Token eval_safe(Token **n); /* OK to call on null ptr */ -Token eval(Token *n[]); /* Don't call with null ptr */ +typedef enum {FULL, LITERAL} EvalMethod; +Token eval_safe(Token **n, EvalMethod meth); /* OK to call on null ptr */ +Token eval(Token *n[], EvalMethod meth); /* Don't call with null ptr */ +Token evaltoken(Token n, EvalMethod meth); /* Caller "owns" the result */ #endif diff --git a/src/common/scanner.c b/src/common/scanner.c index a50c548..5953ecf 100644 --- a/src/common/scanner.c +++ b/src/common/scanner.c @@ -16,6 +16,7 @@ #include #include #include +extern char *strdup(const char* s); extern double strtod(const char *nptr, char **endptr); /* SunOS 4 hack */ #include @@ -34,6 +35,14 @@ const char *Type_Name[] = [LOCATION] = "LOCATION", [FUNCALL] = "FUNCTION-CALL", [EEK] = "ERROR" }; +const char *Op_Name[] = + { [PLUS] = "+", [MINUS] = "-", [MUL] = "*", [DIV] = "/", + [OP] = "(", [CP] = ")", [COMMA] = ",", + [LT] = "<", [LE] = "<=", [GE] = ">=", [GT] = ">", + [ISEQUAL] = "==", [ABOUTEQ] = "~=", [NE] = "!=", + [POW] = "^", [MOD] = "%" + }; + /* identcode -- return number of identifier */ /*{{{*/ int identcode(const char *s, size_t len) { @@ -58,6 +67,12 @@ bool loc_in_box(const Location test, return true; } +/* duperror - Sets tok to an error and strdups the message into place */ +void duperror(Token* tok, const char* erro) { + tok->type = EEK; + tok->u.err = strdup(erro); +} + /* charstring -- match quoted string and return token */ /*{{{*/ static Token *charstring(const char **s) { @@ -259,7 +274,7 @@ static int print_fident(char* dest, size_t space, int id) /* printtok -- print a single token, passed by address, although not changed */ /*{{{*/ size_t printtok(char* dest, size_t size, size_t field_width, int quote_strings, int use_scientific, - int precision, int verbose_error, Token *tok) + int precision, int verbose_error, const Token *tok) { size_t cur; @@ -270,7 +285,7 @@ size_t printtok(char* dest, size_t size, size_t field_width, if (tok != NULLTOKEN) switch (tok->type) { /* EMPTY */ /*{{{*/ - case EMPTY: if (size > 0) dest[cur++] = '\0'; break; + case EMPTY: if (size > 0) dest[cur] = '\0'; break; /*}}}*/ /* STRING */ /*{{{*/ case STRING: diff --git a/src/common/scanner.h b/src/common/scanner.h index 627ee77..fbf7553 100644 --- a/src/common/scanner.h +++ b/src/common/scanner.h @@ -18,7 +18,17 @@ typedef enum { #define MAX_TYPE_NAME_LENGTH 16 extern const char *Type_Name[]; -typedef enum { PLUS, MINUS, MUL, DIV, OP, CP, COMMA, LT, LE, GE, GT, ISEQUAL, ABOUTEQ, NE, POW, MOD } Operator; +typedef enum + { + PLUS, MINUS, MUL, DIV, OP, CP, COMMA, + LT, /* MUST be the first relational operation for parsing to work */ + LE, GE, GT, ISEQUAL, ABOUTEQ, + NE, /* MUST be the last relational operation for parsing to work */ + POW, MOD + } Operator; + +#define MAX_OP_NAME_LENGTH 3 +extern const char *Op_Name[]; typedef int Location[3]; /* NOTE: Locations are passed by REFERENCE not value */ /* I.e., to accapt a Location argument, declare the parameter to be of type @@ -67,10 +77,11 @@ typedef struct Token_struc #define EMPTY_TVEC ((Token**)0) int identcode(const char *s, size_t len); +void duperror(Token* tok, const char* erro); Token **scan(const char **s); -size_t printtok(char* dest, size_t size, size_t field_width, +size_t printtok(char *dest, size_t size, size_t field_width, int quote_strings, int use_scientific, - int precision, int verbose_error, Token *tok); + int precision, int verbose_error, const Token *tok); void print(char *s, size_t size, size_t chars, int quote, int scientific, int precision, Token **n); #ifdef __cplusplus diff --git a/src/common/sheet.c b/src/common/sheet.c index 32aff69..4d45e43 100644 --- a/src/common/sheet.c +++ b/src/common/sheet.c @@ -585,7 +585,7 @@ Token getvalue(Sheet *sheet, const Location at) cell->updated = 1; oldvalue = cell->value; upd_clock = 0; - cell->value = eval_safe(getcont(cell, 2)); + cell->value = eval_safe(getcont(cell, CONTINGENT), FULL); tfree(&oldvalue); } else if (upd_clock) @@ -593,7 +593,7 @@ Token getvalue(Sheet *sheet, const Location at) cell->updated = 1; upd_clock = 0; oldvalue = cell->resvalue; - cell->resvalue = eval_safe(getcont(cell,2)); + cell->resvalue = eval_safe(getcont(cell, CONTINGENT), FULL); tfree(&oldvalue); } upd_sheet = old_sheet;