teapot-spreadsheet/src/common/parser.c

518 lines
12 KiB
C

/* #includes */ /*{{{C}}}*//*{{{*/
#ifndef NO_POSIX_SOURCE
#undef _POSIX_SOURCE
#define _POSIX_SOURCE 1
#undef _POSIX_C_SOURCE
#define _POSIX_C_SOURCE 2
#endif
#ifdef DMALLOC
#include "dmalloc.h"
#endif
#include <assert.h>
#include <ctype.h>
#include <float.h>
#include <stdio.h>
#include <stdlib.h>
extern char *strdup(const char* s);
#include <string.h>
#include "eval.h"
#include "main.h"
#include "misc.h"
#include "parser.h"
#include "scanner.h"
#include "sheet.h"
/*}}}*/
/* #defines */ /*{{{*/
#define MAXARGC 16
/*}}}*/
/* prototypes */ /*{{{*/
static Token term(Token *n[], int *i, EvalMethod meth);
/*}}}*/
/* primary -- parse and evaluate a primary term */ /*{{{*/
static Token primary(Token *n[], int *i, EvalMethod meth)
{
/* variables */ /*{{{*/
int argc;
int fident = -2;
Token *ident,argv[MAXARGC],result;
/*}}}*/
if (n[*i] == NULLTOKEN)
/* error */ /*{{{*/
{
duperror(&result, _("missing operator"));
return result;
}
/*}}}*/
switch (n[*i]->type)
{
/* STRING, FLOAT, INT */ /*{{{*/
case STRING:
case FLOAT:
case INT:
return tcopy(*n[(*i)++]);
/*}}}*/
/* LIDENT */ /*{{{*/
case LIDENT:
{
ident = n[*i];
++(*i);
if (meth == FULL) return findlabel(upd_sheet,ident->u.lident);
return tcopy(*ident);
}
/*}}}*/
/* OPERATOR */ /*{{{*/
case OPERATOR:
{
switch (n[*i]->u.op)
{
case OP: /* return paren term */ /*{{{*/
++(*i);
result = term(n, i, meth);
if (result.type == EEK) return result;
if (n[*i] != NULLTOKEN && n[*i]->type == OPERATOR && n[*i]->u.op==CP)
{
++(*i);
return result;
}
tfree(&result);
duperror(&result, _(") expected"));
return result;
/*}}}*/
case MINUS: /* return negated term */ /*{{{*/
{
++(*i);
if (meth == FULL) return tneg(primary(n, i, meth));
Token arg = primary(n, i, meth);
if (arg.type == EEK) return arg;
result.type = FUNCALL;
result.u.funcall.fident = identcode("negate", 6);
result.u.funcall.argc = 1;
result.u.funcall.argv = malloc(sizeof(Token));
result.u.funcall.argv[0] = arg;
return result;
}
case CP:
duperror(&result, _("Extra umatched ')'"));
return result;
case COMMA:
duperror(&result, _("Occurrence of ',' outside parameter list"));
return result;
default:
/* Can also use any infix symbol as a function, but only with parens, not
bare */
if (n[(*i)+1] == NULLTOKEN || n[(*i)+1]->type != OPERATOR
|| n[(*i)+1]->u.op != OP)
{
const char *templ = "To use %s as function symbol, must use %s(...)";
result.type = EEK;
result.u.err = malloc(strlen(templ) + 2 * MAX_OP_NAME_LENGTH + 1);
sprintf(result.u.err, templ,
Op_Name[n[*i]->u.op], Op_Name[n[*i]->u.op]);
return result;
}
fident = identcode(Op_Name[n[*i]->u.op], strlen(Op_Name[n[*i]->u.op]));
/* FALL THROUGH TO PROCESS OPERATOR AS FUNCTION CALL */
}
}
/*}}}*/
/* FIDENT */ /*{{{*/
case FIDENT:
{
if (fident == -2) fident = n[*i]->u.fident;
++(*i);
if (n[*i] == NULLTOKEN || n[*i]->type != OPERATOR
|| n[*i]->u.op != OP)
argc = -1;
else /* parse arguments and closing paren of function call */ /*{{{*/
{
++(*i);
argc = 0;
if (!(n[*i] != NULLTOKEN && n[*i]->type==OPERATOR && n[*i]->u.op==CP))
/* parse at least one argument */ /*{{{*/
{
if (n[*i] != NULLTOKEN && n[*i]->type==OPERATOR && n[*i]->u.op==COMMA)
/* empty argument */ /*{{{*/
{
argv[argc].type = EMPTY;
}
/*}}}*/
else argv[argc] = term(n, i, meth);
if (argv[argc].type == EEK) return argv[argc];
++argc;
while (n[*i] != NULLTOKEN && n[*i]->type == OPERATOR
&& n[*i]->u.op == COMMA)
/* parse the following argument */ /*{{{*/
{
++(*i);
if (argc < MAXARGC)
{
if (n[*i] != NULLTOKEN && n[*i]->type == OPERATOR &&
(n[*i]->u.op == COMMA || n[*i]->u.op == CP))
{
argv[argc].type=EMPTY;
} else {
argv[argc] = term(n, i, meth);
if (argv[argc].type == EEK) {
for (size_t pa = 0; pa < argc; +pa) tfree(argv + pa);
return argv[argc];
}
}
} else {
duperror(&result, _("too many arguments"));
for (size_t j=0; j < argc; ++j) tfree(&argv[j]);
return result;
}
++argc;
}
/*}}}*/
}
/*}}}*/
if (n[*i] == NULLTOKEN || n[*i]->type != OPERATOR || n[*i]->u.op != CP)
/* ) expected */ /*{{{*/
{
for (size_t j = 0; j < argc; ++j) tfree(&argv[j]);
duperror(&result, _(") expected"));
return result;
}
++(*i);
}
if (meth == FULL)
{
result = tfuncall(fident, argc, argv);
/* To allow a function to return one of its arguments, we need
to be sure not to free that argument: */
for (size_t j = 0; j < argc; ++j) tfree_protected(&argv[j], result);
return result;
}
result.type = FUNCALL;
result.u.funcall.fident = fident;
result.u.funcall.argc = argc;
if (argc > 0)
{
result.u.funcall.argv = malloc(argc*sizeof(Token));
for (size_t ai; ai < argc; ++ai)
result.u.funcall.argv[ai] = argv[ai];
}
return result;
}
/*}}}*/
/* FUNCALL */ /*{{{*/
case FUNCALL:
if (meth == FULL)
result = tfuncall(n[*i]->u.funcall.fident, n[*i]->u.funcall.argc,
n[*i]->u.funcall.argv);
else result = tcopy(*n[*i]);
++(*i);
return result;
/*}}}*/
default: ; /* fall through */
}
duperror(&result, _("value expected"));
return result;
}
/*}}}*/
/* powterm -- parse and evaluate a x^y term */ /*{{{*/
static Token powterm(Token *n[], int *i, EvalMethod meth)
{
Token l;
size_t npows = 0;
l = primary(n, i, meth);
if (l.type == EEK) return l;
while (n[*i] != (Token*)0 && n[*i]->type == OPERATOR && n[*i]->u.op == POW)
{
Token r;
++(*i);
r = primary(n,i,meth);
if (meth == FULL)
{
Token result = tpow(l,r);
tfree(&l);
tfree(&r);
if (result.type == EEK) return result;
l = result;
} else {
if (r.type == EEK)
{
tfree(&l);
return r;
}
if (npows == 0)
{
Token tmp = l;
l.type = FUNCALL;
l.u.funcall.fident = identcode("^", 1);
l.u.funcall.argc = 1;
l.u.funcall.argv = malloc(MAXARGC * sizeof(Token));
l.u.funcall.argv[0] = tmp;
}
if (npows + 1 >= MAXARGC)
{
tfree(&l);
tfree(&r);
duperror(&l, _("Exceeded maximum sequence length of ^"));
return l;
}
++npows; ++(l.u.funcall.argc);
l.u.funcall.argv[npows] = r;
}
}
return l;
}
/*}}}*/
/* piterm -- parse and evaluate a product/division/modulo term */ /*{{{*/
static Token piterm(Token *n[], int *i, EvalMethod meth)
{
int mulident = identcode("*", 1);
Token l;
Operator op = CP;
bool first_funcall = true;
l = powterm(n, i, meth);
if (l.type == EEK) return l;
if (n[*i] != NULLTOKEN && n[*i]->type == OPERATOR) op = n[*i]->u.op;
while (op == DIV || op == MUL || op == MOD)
{
Token r;
++(*i);
r = powterm(n, i, meth);
if (meth == FULL)
{
Token result;
switch (op)
{
case MUL: result = tmul(l,r); break;
case DIV: result = tdiv(l,r); break;
case MOD: result = tmod(l,r); break;
default: assert(0);
}
tfree(&l);
tfree(&r);
if (result.type == EEK) return result;
l = result;
} else {
if (r.type == EEK)
{
tfree(&l);
return r;
}
if (first_funcall || l.u.funcall.fident != mulident || op != MUL)
{
first_funcall = false;
Token tmp = l;
l.u.funcall.fident = identcode(Op_Name[op], strlen(Op_Name[op]));
l.u.funcall.argc = 2;
if (op == MUL) l.u.funcall.argv = malloc(MAXARGC * sizeof(Token));
else l.u.funcall.argv = malloc(2*sizeof(Token));
l.u.funcall.argv[0] = tmp;
l.u.funcall.argv[1] = r;
} else {
if (l.u.funcall.argc >= MAXARGC)
{
tfree(&r);
tfree(&l);
duperror(&l, _("Exceeded maximum sequence length of *"));
return l;
}
l.u.funcall.argv[(l.u.funcall.argc)++] = r;
}
}
if (n[*i] != NULLTOKEN && n[*i]->type == OPERATOR) op = n[*i]->u.op;
else op = CP;
}
return l;
}
/*}}}*/
/* factor -- parse and evaluate a factor of sums/differences */ /*{{{*/
static Token factor(Token *n[], int *i, EvalMethod meth)
{
int plusident = identcode("+", 1);
Token l;
Operator op = CP;
bool first_funcall = true;
l = piterm(n, i, meth);
if (l.type == EEK) return l;
if (n[*i] != NULLTOKEN && n[*i]->type == OPERATOR) op = n[*i]->u.op;
while (op == PLUS || op == MINUS)
{
Token r;
++(*i);
r = piterm(n, i, meth);
if (meth == FULL)
{
Token result = (op==PLUS ? tadd(l,r) : tsub(l,r));
tfree(&l);
tfree(&r);
if (result.type == EEK) return result;
l = result;
} else {
if (r.type == EEK)
{
tfree(&l);
return r;
}
if (first_funcall || l.u.funcall.fident != plusident || op != PLUS)
{
first_funcall = false;
Token tmp = l;
l.u.funcall.fident = identcode(Op_Name[op], strlen(Op_Name[op]));
l.u.funcall.argc = 2;
if (op == PLUS) l.u.funcall.argv = malloc(MAXARGC * sizeof(Token));
else l.u.funcall.argv = malloc(2*sizeof(Token));
l.u.funcall.argv[0] = tmp;
l.u.funcall.argv[1] = r;
} else {
if (l.u.funcall.argc >= MAXARGC)
{
tfree(&r);
tfree(&l);
duperror(&l, _("Exceeded maximum sequence length of +"));
return l;
}
l.u.funcall.argv[(l.u.funcall.argc)++] = r;
}
}
if (n[*i] != NULLTOKEN && n[*i]->type == OPERATOR) op = n[*i]->u.op;
else op = CP;
}
return l;
}
/*}}}*/
/* term -- parse and evaluate a relational term */ /*{{{*/
static Token term(Token *n[], int *i, EvalMethod meth)
{
Token l = factor(n, i, meth);
if (l.type == EEK) return l;
/* a < b < c used to mean (a < b) < c, but that does not make sense really
because there is not an ordering on bools (if we had a separate bool
type). So restrict to a single binary relation; one can still use parens
to get the old, odd behavior */
if (n[*i] != NULLTOKEN && n[*i]->type == OPERATOR
&& n[*i]->u.op >= LT && n[*i]->u.op <= NE)
{
Operator op = n[*i]->u.op;
Token result, r;
++(*i);
r = factor(n, i, meth);
if (meth == FULL)
{
switch (op)
{
case LT: result=tlt(l,r); break;
case LE: result=tle(l,r); break;
case GE: result=tge(l,r); break;
case GT: result=tgt(l,r); break;
case ISEQUAL: result=teq(l,r); break;
case ABOUTEQ: result=tabouteq(l,r); break;
case NE: result=tne(l,r); break;
default: assert(0);
}
tfree(&l);
tfree(&r);
if (result.type == EEK) return result;
l = result;
} else {
if (r.type == EEK)
{
tfree(&l);
return r;
}
Token tmp = l;
l.type = FUNCALL;
l.u.funcall.fident = identcode(Op_Name[op], strlen(Op_Name[op]));
l.u.funcall.argc = 2;
l.u.funcall.argv = malloc(2*sizeof(Token));
l.u.funcall.argv[0] = tmp;
l.u.funcall.argv[1] = r;
}
}
return l;
}
/*}}}*/
/* eval -- parse and evaluate token sequence */ /*{{{*/
Token eval(Token **n, EvalMethod meth)
{
Token l;
int i = 0;
bool first_funcall = true;
assert(upd_sheet != (Sheet*)0);
l = term(n, &i, meth);
if (l.type == EEK) return l;
while (n[i] != NULLTOKEN)
{
Token r = term(n, &i, meth);
if (meth == FULL)
{
Token result = tconcat(l,r);
tfree(&l);
tfree(&r);
if (result.type == EEK) return result;
l = result;
} else {
if (r.type == EEK)
{
tfree(&l);
return r;
}
if (first_funcall)
{
first_funcall = false;
Token tmp = l;
l.u.funcall.fident = identcode("concat", 6);
l.u.funcall.argc = 1;
l.u.funcall.argv = malloc(MAXARGC*sizeof(Token));
l.u.funcall.argv[0] = l;
}
if (l.u.funcall.argc >= MAXARGC)
{
tfree(&l);
tfree(&r);
duperror(&l, _("Exceeded max sequence lentgh of concatenated terms"));
return l;
}
l.u.funcall.argv[(l.u.funcall.argc)++] = r;
}
}
return l;
}
/*}}}*/
/* eval_safe -- like eval, but handles null pointer to token sequence */ /*{{{*/
Token eval_safe(Token **n, EvalMethod meth)
{
Token result;
if (n == EMPTY_TVEC)
{
result.type = EMPTY;
return result;
}
return eval(n, meth);
}