teapot-spreadsheet/src/common/scanner.c

220 lines
5.0 KiB
C

/* #includes */ /*{{{C}}}*//*{{{*/
#ifndef NO_POSIX_SOURCE
#undef _POSIX_SOURCE
#define _POSIX_SOURCE 1
#undef _POSIX_C_SOURCE
#define _POSIX_C_SOURCE 2
#endif
#ifdef DMALLOC
#include "dmalloc.h"
#endif
#include <assert.h>
#include <ctype.h>
extern double strtod(const char *nptr, char **endptr); /* SunOS 4 hack */
extern long double strtold(const char *nptr, char **endptr); /* SunOS 4 hack */
#include <string.h>
#include "default.h"
#include "func.h"
#include "main.h"
#include "misc.h"
#include "scanner.h"
/*}}}*/
/* charstring -- match quoted string and return token */ /*{{{*/
static Token *charstring(char **s)
{
char *r;
r = *s;
if (**s=='"')
{
++(*s);
while (**s!='\0' && **s!='"') if (**s=='\\' && *((*s)+1)!='\0') (*s)+=2; else ++(*s);
if (**s=='\0') { *s=r; return 0; }
else
{
Token *n;
char *t;
++(*s);
n=malloc(sizeof(Token));
n->type=STRING;
t=n->u.string=malloc((size_t)(*s-r));
/* Clean string of quotes. This may waste a few bytes, so? */
++r;
while (r<(*s-1)) if (*r=='\\') { *t++=*(r+1); r+=2; } else *t++=*r++;
*t='\0';
return n;
}
}
else return (Token*)0;
}
/*}}}*/
/* scan_integer -- match an integer and return token */ /*{{{*/
Token *scan_integer(char **s)
{
char *r = *s;
bool ishex = (*s)[0] == '0' && (*s)[1] == 'x';
IntT i = STRTOINT(r, s, 0);
if (*s == r
|| **s == '.' || **s == 'e' || **s == 'E' || (ishex && **s == 'p'))
{ /* either doesn't look like a number, or looks like a float, not an int */
*s = r;
return NULLTOKEN;
}
/* looks like an int */
Token *n = malloc(sizeof(Token));
n->type = INT;
n->u.integer = i;
return n;
}
/*}}}*/
/* scan_flt -- match a floating point number */ /*{{{*/
Token *scan_flt(char **s)
{
char *t = *s;
char *end;
FltT x = STRTOFLT(t, &end);
*s = end;
if (t !=* s && dblfinite(x) == NULL)
{
Token *n = malloc(sizeof(Token));
n->type = FLOAT;
n->u.flt = x;
return n;
}
else
{
*s = t;
return NULLTOKEN;
}
}
/*}}}*/
/* op -- match an op and return token */ /*{{{*/
static Token *op(char **s)
{
Token *n;
Operator op;
switch (**s)
{
case '+': op=PLUS; break;
case '-': op=MINUS; break;
case '*': op=MUL; break;
case '/': op=DIV; break;
case '%': op=MOD; break;
case '(': op=OP; break;
case ')': op=CP; break;
case ',': op=COMMA; break;
case '^': op=POW; break;
case '<': if (*(*s+1)=='=') { ++(*s); op=LE; } else op=LT; break;
case '>': if (*(*s+1)=='=') { ++(*s); op=GE; } else op=GT; break;
case '=': if (*(*s+1)=='=') { ++(*s); op=ISEQUAL; } else return (Token*)0; break;
case '~': if (*(*s+1)=='=') { ++(*s); op=ABOUTEQ; } else return (Token*)0; break;
case '!': if (*(*s+1)=='=') { ++(*s); op=NE; } else return (Token*)0; break;
default: return (Token*)0;
}
n=malloc(sizeof(Token));
n->type=OPERATOR;
n->u.op=op;
++(*s);
return n;
}
/*}}}*/
/* scan_ident -- match an identifier and return token */ /*{{{*/
Token *scan_ident(char **s)
{
const char *begin;
Token *result;
if (isalpha((int)**s) || **s=='_' || **s=='@' || **s=='&' || **s=='.' || **s=='$')
{
int fident;
begin=*s; ++(*s);
while (isalpha((int)**s) || **s=='_' || **s=='@' || **s=='&' || **s=='.' || **s=='$' || isdigit((int)**s)) ++(*s);
result = malloc(sizeof(Token));
if (*s-begin == 3 && strncmp(begin, "and", 3) == 0)
{
result->type = OPERATOR;
result->u.op = LAND;
}
else if (*s-begin == 2 && strncmp(begin, "or", 2) == 0)
{
result->type = OPERATOR;
result->u.op = LOR;
}
else if ((fident = identcode(begin,(size_t)(*s-begin))) == NOT_A_FUNCTION)
{
result->type = LIDENT;
result->u.lident=malloc((size_t)(*s-begin+1));
(void)strncpy(result->u.lident,begin,(size_t)(*s-begin));
result->u.lident[*s-begin]='\0';
}
else
{
result->type = FIDENT;
result->u.fident = fident;
}
return result;
}
return NULLTOKEN;
}
/*}}}*/
/* scan -- scan string into tokens */ /*{{{*/
Token **scan(char **s)
{
/* variables */ /*{{{*/
Token **na,*n;
char *r;
/*}}}*/
/* compute number of tokens */ /*{{{*/
r = *s;
while (*r == ' ') ++r;
size_t i = 0;
for (; *r != '\0'; ++i)
{
const char *or;
or = r;
while (*r == ' ') ++r;
n = charstring(&r);
if (n == NULLTOKEN) n = op(&r);
if (n == NULLTOKEN) n = scan_integer(&r);
if (n == NULLTOKEN) n = scan_flt(&r);
if (n == NULLTOKEN) n = scan_ident(&r);
if (or == r) { *s = r; return EMPTY_TVEC; }
}
/*}}}*/
/* allocate token space */ /*{{{*/
na = malloc(sizeof(Token*)*(i+1));
/*}}}*/
/* store tokens */ /*{{{*/
r = *s;
while (*r==' ') ++r;
for (size_t j = 0; j < i; ++j)
{
while (*r == ' ') ++r;
n = charstring(&r);
if (n == NULLTOKEN) n = op(&r);
if (n == NULLTOKEN) n = scan_integer(&r);
if (n == NULLTOKEN) n = scan_flt(&r);
if (n == NULLTOKEN) n = scan_ident(&r);
na[j] = n;
}
na[i] = NULLTOKEN;
/*}}}*/
return na;
}
/*}}}*/