teapot-spreadsheet/src/common/parser.c

724 lines
18 KiB
C

/* #includes */ /*{{{C}}}*//*{{{*/
#ifndef NO_POSIX_SOURCE
#undef _POSIX_SOURCE
#define _POSIX_SOURCE 1
#undef _POSIX_C_SOURCE
#define _POSIX_C_SOURCE 2
#endif
#ifdef DMALLOC
#include "dmalloc.h"
#endif
#include <assert.h>
#include <ctype.h>
#include <float.h>
#include <stdio.h>
#include <stdlib.h>
extern char *strdup(const char* s);
#include <string.h>
#include "eval.h"
#include "func.h"
#include "main.h"
#include "misc.h"
#include "parser.h"
#include "scanner.h"
#include "sheet.h"
/*}}}*/
/* #defines */ /*{{{*/
#define MAXARGC 16
/*}}}*/
/* prototypes */ /*{{{*/
static Token boolterm(Operator bop, Token *n[], int *i, EvalMethod meth);
/*}}}*/
static Token term(Token *n[], int* i, EvalMethod meth)
{
return boolterm(LOR, n, i, meth);
}
/* full_eval_funcall -- evaluate the args of a funcall token and then
call the function on them
*/
static Token full_eval_funcall(Token *t)
{
assert(t->type == FUNCALL);
if (t->u.funcall.argc < 1)
return tfuncall(t->u.funcall.fident, t->u.funcall.argc, 0);
Token *eval_argv;
if (tfunc[t->u.funcall.fident].eval_as == MACRO)
eval_argv = t->u.funcall.argv;
else {
eval_argv = malloc(t->u.funcall.argc*sizeof(Token));
for (size_t ai = 0; ai < t->u.funcall.argc; ++ai) {
eval_argv[ai] = evaltoken(t->u.funcall.argv[ai], FULL);
}
}
Token result = tfuncall(t->u.funcall.fident, t->u.funcall.argc, eval_argv);
if (tfunc[t->u.funcall.fident].eval_as == FUNCT) {
/* To allow a function to return one of its arguments, we need
to be sure not to free that argument: */
for (size_t ai = 0; ai < t->u.funcall.argc; ++ai)
tfree_protected(&eval_argv[ai], result);
free(eval_argv);
}
return result;
}
/* primary -- parse and evaluate a primary term */ /*{{{*/
static Token primary(Token *n[], int *i, EvalMethod meth)
{
/* variables */ /*{{{*/
int argc;
int fident = -2;
Token *ident,argv[MAXARGC],result;
/*}}}*/
if (n[*i] == NULLTOKEN)
/* error */ /*{{{*/
{
duperror(&result, _("missing operator"));
return result;
}
/*}}}*/
switch (n[*i]->type)
{
/* STRING, FLOAT, INT */ /*{{{*/
case STRING:
case FLOAT:
case INT:
return tcopy(*n[(*i)++]);
/*}}}*/
/* LIDENT */ /*{{{*/
case LIDENT:
{
ident = n[*i];
++(*i);
if (meth == FULL) return findlabel(upd_sheet,ident->u.lident);
return tcopy(*ident);
}
/*}}}*/
/* OPERATOR */ /*{{{*/
case OPERATOR:
{
switch (n[*i]->u.op)
{
case OP: /* return paren term */ /*{{{*/
++(*i);
result = term(n, i, meth);
if (result.type == EEK) return result;
if (n[*i] != NULLTOKEN && n[*i]->type == OPERATOR && n[*i]->u.op==CP)
{
++(*i);
return result;
}
tfree(&result);
duperror(&result, _(") expected"));
return result;
/*}}}*/
/* Unary minus will be handled in powterm */
case CP:
duperror(&result, _("Extra umatched ')'"));
return result;
case COMMA:
duperror(&result, _("Occurrence of ',' outside parameter list"));
return result;
default:
/* Can also use any infix symbol as a function, but only with parens, not
bare */
if (n[(*i)+1] == NULLTOKEN || n[(*i)+1]->type != OPERATOR
|| n[(*i)+1]->u.op != OP)
{
const char *templ = "To use %s as function symbol, must use %s(...)";
result.type = EEK;
result.u.err = malloc(strlen(templ) + 2 * MAX_OP_NAME_LENGTH + 1);
sprintf(result.u.err, templ,
Op_Name[n[*i]->u.op], Op_Name[n[*i]->u.op]);
return result;
}
fident = identcode(Op_Name[n[*i]->u.op], strlen(Op_Name[n[*i]->u.op]));
/* FALL THROUGH TO PROCESS OPERATOR AS FUNCTION CALL */
}
}
/*}}}*/
/* FIDENT */ /*{{{*/
case FIDENT:
{
if (fident == -2) fident = n[*i]->u.fident;
++(*i);
if (n[*i] == NULLTOKEN || n[*i]->type != OPERATOR
|| n[*i]->u.op != OP)
argc = -1;
else /* parse arguments and closing paren of function call */ /*{{{*/
{
++(*i);
argc = 0;
if (!(n[*i] != NULLTOKEN && n[*i]->type==OPERATOR && n[*i]->u.op==CP))
/* parse at least one argument */ /*{{{*/
{
if (n[*i] != NULLTOKEN && n[*i]->type==OPERATOR && n[*i]->u.op==COMMA)
/* empty argument */ /*{{{*/
{
argv[argc].type = EMPTY;
}
/*}}}*/
else argv[argc] = term(n, i, meth);
if (argv[argc].type == EEK) return argv[argc];
++argc;
while (n[*i] != NULLTOKEN && n[*i]->type == OPERATOR
&& n[*i]->u.op == COMMA)
/* parse the following argument */ /*{{{*/
{
++(*i);
if (argc < MAXARGC)
{
if (n[*i] != NULLTOKEN && n[*i]->type == OPERATOR &&
(n[*i]->u.op == COMMA || n[*i]->u.op == CP))
{
argv[argc].type=EMPTY;
} else {
argv[argc] = term(n, i, meth);
if (argv[argc].type == EEK) {
for (size_t pa = 0; pa < argc; +pa) tfree(argv + pa);
return argv[argc];
}
}
} else {
duperror(&result, _("too many arguments"));
for (size_t j=0; j < argc; ++j) tfree(&argv[j]);
return result;
}
++argc;
}
/*}}}*/
}
/*}}}*/
if (n[*i] == NULLTOKEN || n[*i]->type != OPERATOR || n[*i]->u.op != CP)
/* ) expected */ /*{{{*/
{
for (size_t j = 0; j < argc; ++j) tfree(&argv[j]);
duperror(&result, _(") expected"));
return result;
}
++(*i);
}
if (meth == FULL)
{
result = tfuncall(fident, argc, argv);
/* To allow a function to return one of its arguments, we need
to be sure not to free that argument: */
for (size_t j = 0; j < argc; ++j) tfree_protected(&argv[j], result);
return result;
}
result.type = FUNCALL;
result.u.funcall.fident = fident;
result.u.funcall.argc = argc;
if (argc > 0)
{
result.u.funcall.argv = malloc(argc*sizeof(Token));
for (size_t ai = 0; ai < argc; ++ai)
result.u.funcall.argv[ai] = argv[ai];
} else result.u.funcall.argv = NULLTOKEN;
return result;
}
/*}}}*/
/* FUNCALL */ /*{{{*/
case FUNCALL:
if (meth == FULL) result = full_eval_funcall(n[*i]);
else result = tcopy(*n[*i]);
++(*i);
return result;
/*}}}*/
default: ; /* fall through */
}
duperror(&result, _("value expected"));
return result;
}
/*}}}*/
/* powterm -- parse and evaluate a x^y term */ /*{{{*/
static Token powterm(Token *n[], int *i, EvalMethod meth)
{
Token l;
size_t npows = 0;
if (n[*i] != NULLTOKEN && n[*i]->type == OPERATOR && n[*i]->u.op == MINUS)
{
/* A - symbol here is a pain. If it is being used as a function symbol, it
is higher precedence than exponentiation. If it is unary negation,
then it's lower precedence and we have to grab a powterm to the right,
and negate it. As far as I can tell the only way to tell is to
look ahead a term to see if there's a comma...
*/
bool unaryneg = true;
int j = *i + 1;
if (n[j] == NULLTOKEN)
return duperror(&l, _("A bare - is not a valid expression"));
if (n[j]->type == OPERATOR && n[j]->u.op == OP)
{
++j;
Token dummy = term(n, &j, meth);
if (n[j] != NULLTOKEN && n[j]->type == OPERATOR && n[j]->u.op == COMMA)
unaryneg = false;
tfree(&dummy);
}
if (unaryneg)
{
++(*i);
l = powterm(n, i, meth);
if (meth == FULL) return tneg(l);
if (l.type == EEK) return l;
if (TOKISNUM(l)) return tneg(l);
Token result;
result.type = FUNCALL;
result.u.funcall.fident = FUNC_NEGATE;
result.u.funcall.argc = 1;
result.u.funcall.argv = malloc(sizeof(Token));
result.u.funcall.argv[0] = l;
return result;
}
}
l = primary(n, i, meth);
if (l.type == EEK) return l;
while (n[*i] != NULLTOKEN && n[*i]->type == OPERATOR && n[*i]->u.op == POW)
{
Token r;
++(*i);
r = primary(n,i,meth);
if (meth == FULL)
{
Token result = tpow(l,r);
tfree_protected(&l, result);
tfree_protected(&r, result);
if (result.type == EEK) return result;
l = result;
} else {
if (r.type == EEK)
{
tfree(&l);
return r;
}
if (npows == 0)
{
Token tmp = l;
l.type = FUNCALL;
l.u.funcall.fident = FUNC_CARET;
l.u.funcall.argc = 1;
l.u.funcall.argv = malloc(MAXARGC * sizeof(Token));
l.u.funcall.argv[0] = tmp;
}
if (npows + 1 >= MAXARGC)
{
tfree(&l);
tfree(&r);
duperror(&l, _("Exceeded maximum sequence length of ^"));
return l;
}
++npows; ++(l.u.funcall.argc);
l.u.funcall.argv[npows] = r;
}
}
return l;
}
/*}}}*/
/* piterm -- parse and evaluate a product/division/modulo term */ /*{{{*/
static Token piterm(Token *n[], int *i, EvalMethod meth)
{
FunctionIdentifier mulident = FUNC_ASTERISK;
Token l;
Operator op = CP;
bool first_funcall = true;
l = powterm(n, i, meth);
if (l.type == EEK) return l;
if (n[*i] != NULLTOKEN && n[*i]->type == OPERATOR) op = n[*i]->u.op;
while (op == DIV || op == MUL || op == MOD)
{
Token r;
++(*i);
r = powterm(n, i, meth);
if (meth == FULL)
{
Token result;
switch (op)
{
case MUL: result = tmul(l,r); break;
case DIV: result = tdiv(l,r); break;
case MOD: result = tmod(l,r); break;
default: assert(0);
}
tfree_protected(&l, result);
tfree_protected(&r, result);
if (result.type == EEK) return result;
l = result;
} else {
if (r.type == EEK)
{
tfree(&l);
return r;
}
if (first_funcall || l.u.funcall.fident != mulident || op != MUL)
{
first_funcall = false;
Token tmp = l;
l.type = FUNCALL;
l.u.funcall.fident = identcode(Op_Name[op], strlen(Op_Name[op]));
l.u.funcall.argc = 2;
if (op == MUL) l.u.funcall.argv = malloc(MAXARGC * sizeof(Token));
else l.u.funcall.argv = malloc(2*sizeof(Token));
l.u.funcall.argv[0] = tmp;
l.u.funcall.argv[1] = r;
} else {
if (l.u.funcall.argc >= MAXARGC)
{
tfree(&r);
tfree(&l);
duperror(&l, _("Exceeded maximum sequence length of *"));
return l;
}
l.u.funcall.argv[(l.u.funcall.argc)++] = r;
}
}
if (n[*i] != NULLTOKEN && n[*i]->type == OPERATOR) op = n[*i]->u.op;
else op = CP;
}
return l;
}
/*}}}*/
/* factor -- parse and evaluate a factor of sums/differences */ /*{{{*/
static Token factor(Token *n[], int *i, EvalMethod meth)
{
Token l = piterm(n, i, meth);
if (l.type == EEK) return l;
FunctionIdentifier plusident = FUNC_PLUS_SYMBOL;
Operator op = CP;
bool first_funcall = true;
if (n[*i] != NULLTOKEN && n[*i]->type == OPERATOR) op = n[*i]->u.op;
while (op == PLUS || op == MINUS)
{
++(*i);
Token r = piterm(n, i, meth);
if (meth == FULL)
{
Token result = (op==PLUS ? tadd(l,r) : tsub(l,r));
tfree_protected(&l, result);
tfree_protected(&r, result);
if (result.type == EEK) return result;
l = result;
} else {
if (r.type == EEK)
{
tfree(&l);
return r;
}
if (first_funcall || l.u.funcall.fident != plusident || op != PLUS)
{
first_funcall = false;
Token tmp = l;
l.type = FUNCALL;
l.u.funcall.fident = identcode(Op_Name[op], strlen(Op_Name[op]));
l.u.funcall.argc = 2;
if (op == PLUS) l.u.funcall.argv = malloc(MAXARGC * sizeof(Token));
else l.u.funcall.argv = malloc(2*sizeof(Token));
l.u.funcall.argv[0] = tmp;
l.u.funcall.argv[1] = r;
} else {
if (l.u.funcall.argc >= MAXARGC)
{
tfree(&r);
tfree(&l);
return duperror(&l, _("Exceeded maximum sequence length of +"));
}
l.u.funcall.argv[(l.u.funcall.argc)++] = r;
}
}
if (n[*i] != NULLTOKEN && n[*i]->type == OPERATOR) op = n[*i]->u.op;
else op = CP;
}
return l;
}
/*}}}*/
/* relterm -- parse and evaluate a relational term */ /*{{{*/
static Token relterm(Token *n[], int *i, EvalMethod meth)
{
Token l = factor(n, i, meth);
if (l.type == EEK) return l;
bool firstcomp = true;
Token result = l;
Operator op = CP;
/* a < b < c now means a < b and b < c, so we have to save both the running
result and the last term
*/
if (n[*i] != NULLTOKEN && n[*i]->type == OPERATOR) op = n[*i]->u.op;
while (IS_RELATION_OP(op))
{
++(*i);
Token r = factor(n, i, LITERAL);
if (meth == FULL)
{
if (!firstcomp && result.type == BOOL && !result.u.bl) {
tfree(&r);
} else {
Token tmp = evaltoken(r, FULL);
tfree_protected(&r, tmp);
r = tmp;
switch (op)
{
case LT: tmp = tlt(l,r); break;
case LE: tmp = tle(l,r); break;
case GE: tmp = tge(l,r); break;
case GT: tmp = tgt(l,r); break;
case ISEQUAL: tmp = teq(l,r); break;
case ABOUTEQ: tmp = tabouteq(l,r); break;
case NE: tmp = tne(l,r); break;
default: assert(0);
}
tfree_protected(&l, result);
l = r;
if (firstcomp) {
result = tmp;
} else {
Token newres = tand(result, tmp);
tfree_protected(&result, newres);
tfree_protected(&tmp, newres);
result = newres;
}
if (result.type == EEK) {
tfree_protected(&l, result);
return result;
}
}
} else { /* meth = LITERAL */
if (r.type == EEK)
{
tfree(&l);
return r;
}
Token newcomp;
newcomp.type = FUNCALL;
newcomp.u.funcall.fident = identcode(Op_Name[op], strlen(Op_Name[op]));
newcomp.u.funcall.argc = 2;
newcomp.u.funcall.argv = malloc(2*sizeof(Token));
newcomp.u.funcall.argv[0] = l;
newcomp.u.funcall.argv[1] = r;
l = r;
if (firstcomp) {
result = newcomp;
firstcomp = false;
} else if (result.u.fident != FUNC_AND) {
Token holdres = result;
result.u.funcall.fident = FUNC_AND;
result.u.funcall.argc = 2;
result.u.funcall.argv = malloc(MAXARGC*sizeof(Token));
result.u.funcall.argv[0] = holdres;
result.u.funcall.argv[1] = newcomp;
} else if (result.u.funcall.argc >= MAXARGC) {
tfree(&result);
return
duperror(&result,
_("Exeeded maximum sequence length of comparisons"));
} else {
result.u.funcall.argv[(result.u.funcall.argc)++] = newcomp;
}
}
if (n[*i] != NULLTOKEN && n[*i]->type == OPERATOR) op = n[*i] -> u.op;
else op = CP;
} /* while next op is a comparison */
return result;
}
/*}}}*/
/* boolterm -- parse and evaluate a boolean term */ /*{{{*/
static Token boolterm(Operator bop, Token *n[], int *i, EvalMethod meth)
{
assert (bop == LAND || bop == LOR);
Token l;
Operator op = CP;
bool first_funcall = true;
l = (bop == LAND) ? relterm(n, i, meth) : boolterm(LAND, n, i, meth);
if (l.type == EEK) return l;
if (n[*i] != NULLTOKEN && n[*i]->type == OPERATOR) op = n[*i]->u.op;
while (op == bop)
{
++(*i);
/* Need to evaluate right operand literally for the sake of short
circuiting
*/
Token r =
(bop == LAND) ? relterm(n, i, LITERAL) : boolterm(LAND, n, i, LITERAL);
if (meth == FULL) {
if (l.type == BOOL
&& ((bop == LAND && !l.u.bl) || (bop == LOR && l.u.bl)))
tfree(&r);
else {
Token result = evaltoken(r, FULL);
tfree_protected(&r, result);
Token tmp = (bop == LAND) ? tand(l, result) : tor(r, result);
tfree_protected(&result, tmp);
tfree_protected(&l, tmp);
if (tmp.type == EEK) return tmp;
l = tmp;
}
} else {
if (r.type == EEK)
{
tfree(&l);
return r;
}
if (first_funcall)
{
first_funcall = false;
Token tmp = l;
l.type = FUNCALL;
l.u.funcall.fident = identcode(Op_Name[op], strlen(Op_Name[op]));
l.u.funcall.argc = 2;
l.u.funcall.argv = malloc(MAXARGC * sizeof(Token));
l.u.funcall.argv[0] = tmp;
l.u.funcall.argv[1] = r;
} else {
if (l.u.funcall.argc >= MAXARGC)
{
tfree(&r);
tfree(&l);
const char* templ = _("Exceeded max sequence length of %s");
l.type = EEK;
l.u.err = malloc(strlen(templ) + MAX_OP_NAME_LENGTH + 1);
sprintf(l.u.err, templ, Op_Name[op]);
return l;
}
l.u.funcall.argv[(l.u.funcall.argc)++] = r;
}
}
if (n[*i] != NULLTOKEN && n[*i]->type == OPERATOR) op = n[*i]->u.op;
else op = CP;
}
return l;
}
/*}}}*/
/* eval -- parse and evaluate nonempty token sequence
if the sequence might be empty, use eval_safe. */ /*{{{*/
Token eval(Token **n, EvalMethod meth)
{
Token l;
int i = 0;
bool first_funcall = true;
assert(meth == LITERAL || upd_sheet != (Sheet*)0);
l = term(n, &i, meth);
if (l.type == EEK) return l;
while (n[i] != NULLTOKEN)
{
Token r = term(n, &i, meth);
if (meth == FULL)
{
Token result = tconcat(l,r);
tfree_protected(&l, result);
tfree_protected(&r, result);
if (result.type == EEK) return result;
l = result;
} else {
if (r.type == EEK)
{
tfree(&l);
return r;
}
if (first_funcall)
{
first_funcall = false;
Token tmp = l;
l.type = FUNCALL;
l.u.funcall.fident = FUNC_CONCAT;
l.u.funcall.argc = 1;
l.u.funcall.argv = malloc(MAXARGC*sizeof(Token));
l.u.funcall.argv[0] = tmp;
}
if (l.u.funcall.argc >= MAXARGC)
{
tfree(&l);
tfree(&r);
duperror(&l, _("Exceeded max sequence lentgh of concatenated terms"));
return l;
}
l.u.funcall.argv[(l.u.funcall.argc)++] = r;
}
}
return l;
}
/*}}}*/
/* eval_safe -- like eval, but handles null pointer to token sequence */ /*{{{*/
Token eval_safe(Token **n, EvalMethod meth)
{
Token result;
if (n == EMPTY_TVEC || *n == NULLTOKEN)
{
result.type = EMPTY;
return result;
}
return eval(n, meth);
}
/* evaltoken -- like eval, but evaluate a single token
NOTE: caller owns the result and must arrrange that it be
eventually tfree()ed */ /*{{{*/
Token evaltoken(Token n, EvalMethod meth)
{
if (meth == LITERAL) return tcopy(n);
switch (n.type) {
case EMPTY: return n;
case STRING: return tcopy(n);
case FLOAT:
case INT:
return n;
case OPERATOR: {
Token err;
err.type = EEK;
const char *templ = _("Attempt to eval bare operator token: ");
err.u.err = malloc(strlen(templ) + MAX_OP_NAME_LENGTH + 2);
strcpy(err.u.err, templ);
strcat(err.u.err, Op_Name[n.u.op]);
return err;
}
case LIDENT: return findlabel(upd_sheet, n.u.lident);
case FIDENT: {
Token err;
err.type = EEK;
const char *templ = _("Attempt to eval bare function identifier token: ");
err.u.err = malloc(strlen(templ) + MAX_FUNC_NAME_LENGTH + 2);
strcpy(err.u.err, templ);
strcat(err.u.err, tfunc[n.u.fident].name);
return err;
}
case LOCATION: return n;
case FUNCALL: return full_eval_funcall(&n);
case EEK: return tcopy(n);
default: assert(0);
}
return n;
}