teapot-spreadsheet/src/common/scanner.c

290 lines
7.1 KiB
C

/* #includes */ /*{{{C}}}*//*{{{*/
#ifndef NO_POSIX_SOURCE
#undef _POSIX_SOURCE
#define _POSIX_SOURCE 1
#undef _POSIX_C_SOURCE
#define _POSIX_C_SOURCE 2
#endif
#ifdef DMALLOC
#include "dmalloc.h"
#endif
#include <assert.h>
#include <ctype.h>
extern char *strdup(const char* s);
extern double strtod(const char *nptr, char **endptr); /* SunOS 4 hack */
extern long double strtold(const char *nptr, char **endptr); /* SunOS 4 hack */
#include <string.h>
#include "default.h"
#include "func.h"
#include "main.h"
#include "misc.h"
#include "scanner.h"
/*}}}*/
const char *Type_Name[] =
{ [EMPTY] = "EMPTY", [STRING] = "STRING", [FLOAT] = "FLOAT", [INT] = "INT",
[OPERATOR] = "OPERATOR", [LIDENT] = "LABEL", [FIDENT] = "FUNCTION",
[LOCATION] = "LOCATION", [FUNCALL] = "FUNCTION-CALL", [EEK] = "ERROR",
[BOOL] = "BOOL"
};
const char *Op_Name[] =
{ [PLUS] = "+", [MINUS] = "-", [MUL] = "*", [DIV] = "/",
[OP] = "(", [CP] = ")", [COMMA] = ",",
[LT] = "<", [LE] = "<=", [GE] = ">=", [GT] = ">",
[ISEQUAL] = "==", [ABOUTEQ] = "~=", [NE] = "!=",
[POW] = "^", [MOD] = "%", [LAND] = "and", [LOR] = "or"
};
/* loc_in_box -- returns true if test is in the box determined by b and c */
bool loc_in_box(const Location test,
const Location b, const Location c)
{
for (Dimensions dim = X; dim < HYPER; ++dim)
{
if (test[dim] < b[dim] && test[dim] < c[dim]) return false;
if (test[dim] > b[dim] && test[dim] > c[dim]) return false;
}
return true;
}
/* cleartoken - Initialize all of the memory of a token */ /*{{{*/
void cleartoken(Token* tok)
{
tok->type = EMPTY;
tok->u.flt = 0.0;
tok->u.location[0] = 0;
tok->u.location[1] = 0;
tok->u.location[2] = 0;
}
/* tok_matches - return true if l and r appear to be the same token */ /*{{{*/
bool tok_matches(const Token* l, const Token *r)
{
if (l->type != r->type) return false;
switch (l->type) {
case EMPTY: return true;
case STRING: return l->u.string == r->u.string;
case FLOAT: return l->u.flt == r->u.flt;
case INT: return l->u.integer == r->u.integer;
case OPERATOR: return l->u.op == r->u.op;
case LIDENT: return l->u.lident == r->u.lident;
case FIDENT: return l->u.fident == r->u.fident;
case LOCATION:
return l->u.location[X] == r->u.location[X]
&& l->u.location[Y] == r->u.location[Y]
&& l->u.location[Z] == r->u.location[Z];
case EEK: return l->u.err == r->u.err;
case FUNCALL:
return l->u.funcall.fident == r->u.funcall.fident
&& l->u.funcall.argc == r->u.funcall.argc
&& l->u.funcall.argv == r->u.funcall.argv;
case BOOL:
return l->u.bl == r->u.bl;
}
assert(0);
return false;
}
/* duperror - Sets tok to an error and strdups the message into place */
Token duperror(Token* tok, const char* erro)
{
tok->type = EEK;
tok->u.err = strdup(erro);
return *tok;
}
/* charstring -- match quoted string and return token */ /*{{{*/
static Token *charstring(char **s)
{
char *r;
r = *s;
if (**s=='"')
{
++(*s);
while (**s!='\0' && **s!='"') if (**s=='\\' && *((*s)+1)!='\0') (*s)+=2; else ++(*s);
if (**s=='\0') { *s=r; return 0; }
else
{
Token *n;
char *t;
++(*s);
n=malloc(sizeof(Token));
n->type=STRING;
t=n->u.string=malloc((size_t)(*s-r));
/* Clean string of quotes. This may waste a few bytes, so? */
++r;
while (r<(*s-1)) if (*r=='\\') { *t++=*(r+1); r+=2; } else *t++=*r++;
*t='\0';
return n;
}
}
else return (Token*)0;
}
/*}}}*/
/* scan_integer -- match an integer and return token */ /*{{{*/
Token *scan_integer(char **s)
{
char *r = *s;
IntT i = STRTOINT(r, s, 0);
if (*s != r && **s != '.' && **s != 'e')
{
Token *n = malloc(sizeof(Token));
n->type = INT;
n->u.integer = i;
return n;
}
else { *s = r; return NULLTOKEN; }
}
/*}}}*/
/* scan_flt -- match a floating point number */ /*{{{*/
Token *scan_flt(char **s)
{
char *t = *s;
char *end;
FltT x = STRTOFLT(t, &end);
*s = end;
if (t !=* s && dblfinite(x) == NULL)
{
Token *n = malloc(sizeof(Token));
n->type = FLOAT;
n->u.flt = x;
return n;
}
else
{
*s = t;
return NULLTOKEN;
}
}
/*}}}*/
/* op -- match an op and return token */ /*{{{*/
static Token *op(char **s)
{
Token *n;
Operator op;
switch (**s)
{
case '+': op=PLUS; break;
case '-': op=MINUS; break;
case '*': op=MUL; break;
case '/': op=DIV; break;
case '%': op=MOD; break;
case '(': op=OP; break;
case ')': op=CP; break;
case ',': op=COMMA; break;
case '^': op=POW; break;
case '<': if (*(*s+1)=='=') { ++(*s); op=LE; } else op=LT; break;
case '>': if (*(*s+1)=='=') { ++(*s); op=GE; } else op=GT; break;
case '=': if (*(*s+1)=='=') { ++(*s); op=ISEQUAL; } else return (Token*)0; break;
case '~': if (*(*s+1)=='=') { ++(*s); op=ABOUTEQ; } else return (Token*)0; break;
case '!': if (*(*s+1)=='=') { ++(*s); op=NE; } else return (Token*)0; break;
default: return (Token*)0;
}
n=malloc(sizeof(Token));
n->type=OPERATOR;
n->u.op=op;
++(*s);
return n;
}
/*}}}*/
/* scan_ident -- match an identifier and return token */ /*{{{*/
Token *scan_ident(char **s)
{
const char *begin;
Token *result;
if (isalpha((int)**s) || **s=='_' || **s=='@' || **s=='&' || **s=='.' || **s=='$')
{
int fident;
begin=*s; ++(*s);
while (isalpha((int)**s) || **s=='_' || **s=='@' || **s=='&' || **s=='.' || **s=='$' || isdigit((int)**s)) ++(*s);
result = malloc(sizeof(Token));
if (*s-begin == 3 && strncmp(begin, "and", 3) == 0)
{
result->type = OPERATOR;
result->u.op = LAND;
}
else if (*s-begin == 2 && strncmp(begin, "or", 2) == 0)
{
result->type = OPERATOR;
result->u.op = LOR;
}
else if ((fident = identcode(begin,(size_t)(*s-begin))) == NOT_A_FUNCTION)
{
result->type = LIDENT;
result->u.lident=malloc((size_t)(*s-begin+1));
(void)strncpy(result->u.lident,begin,(size_t)(*s-begin));
result->u.lident[*s-begin]='\0';
}
else
{
result->type = FIDENT;
result->u.fident = fident;
}
return result;
}
return NULLTOKEN;
}
/*}}}*/
/* scan -- scan string into tokens */ /*{{{*/
Token **scan(char **s)
{
/* variables */ /*{{{*/
Token **na,*n;
char *r;
/*}}}*/
/* compute number of tokens */ /*{{{*/
r = *s;
while (*r == ' ') ++r;
int i = 0;
for (; *r != '\0'; ++i)
{
const char *or;
or = r;
while (*r == ' ') ++r;
n = charstring(&r);
if (n == NULLTOKEN) n = op(&r);
if (n == NULLTOKEN) n = scan_integer(&r);
if (n == NULLTOKEN) n = scan_flt(&r);
if (n == NULLTOKEN) n = scan_ident(&r);
if (or == r) { *s = r; return EMPTY_TVEC; }
}
/*}}}*/
/* allocate token space */ /*{{{*/
na = malloc(sizeof(Token*)*(i+1));
/*}}}*/
/* store tokens */ /*{{{*/
r = *s;
while (*r==' ') ++r;
for (int j = 0; j < i; ++j)
{
while (*r == ' ') ++r;
n = charstring(&r);
if (n == NULLTOKEN) n = op(&r);
if (n == NULLTOKEN) n = scan_integer(&r);
if (n == NULLTOKEN) n = scan_flt(&r);
if (n == NULLTOKEN) n = scan_ident(&r);
na[j] = n;
}
na[i] = NULLTOKEN;
/*}}}*/
return na;
}
/*}}}*/