teapot-spreadsheet/src/common/scanner.c

417 lines
9.8 KiB
C

/* #includes */ /*{{{C}}}*//*{{{*/
#ifndef NO_POSIX_SOURCE
#undef _POSIX_SOURCE
#define _POSIX_SOURCE 1
#undef _POSIX_C_SOURCE
#define _POSIX_C_SOURCE 2
#endif
#ifdef DMALLOC
#include "dmalloc.h"
#endif
#include <assert.h>
#include <ctype.h>
#include <float.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
extern double strtod(const char *nptr, char **endptr); /* SunOS 4 hack */
#include <string.h>
#include "default.h"
#include "func.h"
#include "main.h"
#include "misc.h"
#include "utf8.h"
#include "scanner.h"
/*}}}*/
const char *Type_Name[] =
{ [EMPTY] = "EMPTY", [STRING] = "STRING", [FLOAT] = "FLOAT", [INT] = "INT",
[OPERATOR] = "OPERATOR", [LIDENT] = "LABEL", [FIDENT] = "FUNCTION",
[LOCATION] = "LOCATION", [EEK] = "ERROR"
};
/* identcode -- return number of identifier */ /*{{{*/
int identcode(const char *s, size_t len)
{
Tfunc *p;
int fident;
for (p=tfunc,fident=0; p->name[0]!='\0' && (len!=strlen(p->name) || strncmp(s,p->name,len)); ++p,++fident);
if (p->name[0]=='\0') return -1;
else return fident;
}
/*}}}*/
/* loc_in_box -- returns true if test is in the box determined by b and c */
bool loc_in_box(const Location test,
const Location b, const Location c)
{
for (Dimensions dim = X; dim < HYPER; ++dim)
{
if (test[dim] < b[dim] && test[dim] < c[dim]) return false;
if (test[dim] > b[dim] && test[dim] > c[dim]) return false;
}
return true;
}
/* charstring -- match quoted string and return token */ /*{{{*/
static Token *charstring(const char **s)
{
const char *r;
r=*s;
if (**s=='"')
{
++(*s);
while (**s!='\0' && **s!='"') if (**s=='\\' && *((*s)+1)!='\0') (*s)+=2; else ++(*s);
if (**s=='\0') { *s=r; return 0; }
else
{
Token *n;
char *t;
++(*s);
n=malloc(sizeof(Token));
n->type=STRING;
t=n->u.string=malloc((size_t)(*s-r));
/* Clean string of quotes. This may waste a few bytes, so? */
++r;
while (r<(*s-1)) if (*r=='\\') { *t++=*(r+1); r+=2; } else *t++=*r++;
*t='\0';
return n;
}
}
else return (Token*)0;
}
/*}}}*/
/* integer -- match an unsigned integer and return token */ /*{{{*/
static Token *integer(const char **s)
{
const char *r;
long i;
r=*s;
i=posnumber(r,s);
if (*s!=r && **s!='.' && **s!='e')
{
Token *n;
n=malloc(sizeof(Token));
n->type=INT;
n->u.integer=i;
return n;
}
else { *s=r; return (Token*)0; }
}
/*}}}*/
/* flt -- match a floating point number */ /*{{{*/
static Token *flt(const char **s)
{
/* variables */ /*{{{*/
const char *t;
char *end;
Token *n;
double x;
/*}}}*/
t=*s;
x=strtod(t,&end);
*s = end;
if (t!=*s && dblfinite(x)==(const char*)0)
{
n=malloc(sizeof(Token));
n->type=FLOAT;
n->u.flt=x;
return n;
}
else
{
*s=t;
return (Token*)0;
}
}
/*}}}*/
/* op -- match an op and return token */ /*{{{*/
static Token *op(const char **s)
{
Token *n;
Operator op;
switch (**s)
{
case '+': op=PLUS; break;
case '-': op=MINUS; break;
case '*': op=MUL; break;
case '/': op=DIV; break;
case '%': op=MOD; break;
case '(': op=OP; break;
case ')': op=CP; break;
case ',': op=COMMA; break;
case '^': op=POW; break;
case '<': if (*(*s+1)=='=') { ++(*s); op=LE; } else op=LT; break;
case '>': if (*(*s+1)=='=') { ++(*s); op=GE; } else op=GT; break;
case '=': if (*(*s+1)=='=') { ++(*s); op=ISEQUAL; } else return (Token*)0; break;
case '~': if (*(*s+1)=='=') { ++(*s); op=ABOUTEQ; } else return (Token*)0; break;
case '!': if (*(*s+1)=='=') { ++(*s); op=NE; } else return (Token*)0; break;
default: return (Token*)0;
}
n=malloc(sizeof(Token));
n->type=OPERATOR;
n->u.op=op;
++(*s);
return n;
}
/*}}}*/
/* ident -- match an identifier and return token */ /*{{{*/
static Token *ident(const char **s)
{
const char *begin;
Token *result;
if (isalpha((int)**s) || **s=='_' || **s=='@' || **s=='&' || **s=='.' || **s=='$')
{
int fident;
begin=*s; ++(*s);
while (isalpha((int)**s) || **s=='_' || **s=='@' || **s=='&' || **s=='.' || **s=='$' || isdigit((int)**s)) ++(*s);
result=malloc(sizeof(Token));
if ((fident=identcode(begin,(size_t)(*s-begin)))==-1)
{
result->type=LIDENT;
result->u.lident=malloc((size_t)(*s-begin+1));
(void)strncpy(result->u.lident,begin,(size_t)(*s-begin));
result->u.lident[*s-begin]='\0';
}
else
{
result->type=FIDENT;
result->u.fident=fident;
}
return result;
}
return (Token*)0;
}
/*}}}*/
/* scan -- scan string into tokens */ /*{{{*/
Token **scan(const char **s)
{
/* variables */ /*{{{*/
Token **na,*n;
const char *r;
int i,j;
/*}}}*/
/* compute number of tokens */ /*{{{*/
r=*s;
while (*r==' ') ++r;
for (i=0; *r!='\0'; ++i)
{
const char *or;
or=r;
while (*r==' ') ++r;
n=charstring(&r);
if (n==(Token*)0) n=op(&r);
if (n==(Token*)0) n=integer(&r);
if (n==(Token*)0) n=flt(&r);
if (n==(Token*)0) n=ident(&r);
if (or==r) { *s=r; return (Token**)0; }
}
/*}}}*/
/* allocate token space */ /*{{{*/
na=malloc(sizeof(Token*)*(i+1));
/*}}}*/
/* store tokens */ /*{{{*/
r=*s;
while (*r==' ') ++r;
for (j=0; j<i; ++j)
{
while (*r==' ') ++r;
n=charstring(&r);
if (n==(Token*)0) n=op(&r);
if (n==(Token*)0) n=integer(&r);
if (n==(Token*)0) n=flt(&r);
if (n==(Token*)0) n=ident(&r);
na[j]=n;
}
na[j]=(Token*)0;
/*}}}*/
return na;
}
/*}}}*/
/* printtok -- print a single token, passed by address, although not changed */ /*{{{*/
size_t printtok(char* dest, size_t size, size_t field_width,
int quote_strings, int use_scientific,
int precision, int verbose_error, Token *tok)
{
size_t cur;
if (debug_level > 2) {
printf("..Entering printtok; bufsize %d, field_width %d, qs %d, us %d, prec %d, verr %d\n", size, field_width, quote_strings, use_scientific, precision, verbose_error);
}
cur = 0;
if (tok != NULLTOKEN) switch (tok->type)
{
/* EMPTY */ /*{{{*/
case EMPTY: if (size > 0) dest[cur++] = '\0'; break;
/*}}}*/
/* STRING */ /*{{{*/
case STRING:
{
char *str = tok->u.string;
if (quote_strings && cur<size) dest[cur++] = '"';
for (;cur<size && *str != '\0'; ++str)
{
if (quote_strings && (*str == '"' || *str=='\\')) dest[cur++] = '\\';
if (cur<size) dest[cur++]=*str;
}
if (quote_strings && cur<size) dest[cur++] = '"';
break;
}
/*}}}*/
/* INT */ /*{{{*/
case INT:
{
char buf[20];
size_t buflen;
buflen=sprintf(buf,"%ld",tok->u.integer);
assert(buflen<sizeof(buf));
(void)strncpy(dest+cur,buf,size-cur-1);
cur+=buflen;
break;
}
/*}}}*/
/* FLOAT */ /*{{{*/
case FLOAT:
{
/* variables */ /*{{{*/
char buf[1024],*p;
size_t len;
/*}}}*/
len=sprintf(buf, use_scientific ? "%.*e" : "%.*f",
precision == -1 ? DBL_DIG-2 : precision,
tok->u.flt);
assert(len<sizeof(buf));
if (!use_scientific && precision==-1)
{
p=&buf[len-1];
while (p>buf && *p=='0' && *(p-1)!='.') { *p='\0'; --p; --len; }
}
p=buf+len;
while (*--p==' ') { *p='\0'; --len; }
(void)strncpy(dest+cur,buf,size-cur-1);
cur+=len;
break;
}
/*}}}*/
/* OPERATOR */ /*{{{*/
case OPERATOR:
{
static const char *ops[]={ "+", "-", "*", "/", "(", ")", ",", "<", "<=", ">=", ">", "==", "~=", "!=", "^", "%" };
if ((size-cur)>1)
{
dest[cur++]=*ops[tok->u.op];
if (*(ops[tok->u.op]+1) && size>cur) dest[cur++]=*(ops[tok->u.op]+1);
}
break;
}
/*}}}*/
/* LIDENT */ /*{{{*/
case LIDENT:
{
size_t identlen;
identlen=strlen(tok->u.lident);
if ((cur+identlen+1)<=size) strcpy(dest+cur,tok->u.lident);
else (void)strncpy(dest+cur,tok->u.lident,size-cur-1);
cur+=identlen;
break;
}
/*}}}*/
/* FIDENT */ /*{{{*/
case FIDENT:
{
size_t identlen;
if (debug_level > 2) {
printf("...Found function [%s].\n", tfunc[tok->u.fident].name);
}
identlen=strlen(tfunc[tok->u.fident].name);
if ((cur+identlen+1)<=size) strcpy(dest+cur,tfunc[tok->u.fident].name);
else (void)strncpy(dest+cur,tfunc[tok->u.fident].name,size-cur-1);
cur+=identlen;
break;
}
/*}}}*/
/* LOCATION */ /*{{{*/
case LOCATION:
{
char buf[60];
sprintf(buf,"&(%d,%d,%d)",tok->u.location[0],tok->u.location[1],tok->u.location[2]);
(void)strncpy(dest+cur,buf,size-cur-1);
cur+=strlen(buf);
break;
}
/*}}}*/
/* EEK */ /*{{{*/
case EEK:
{
size_t errlen;
(void)strncpy(dest+cur,_("ERROR"),size-cur-1);
cur += strlen(_("ERROR"));
if (verbose_error)
{
(void)strncpy(dest+cur, ": ", size-cur-1);
cur += 2;
errlen = strlen(tok->u.err);
if ((cur+errlen+1) <= size) strcpy(dest+cur, tok->u.err);
else (void)strncpy(dest+cur, tok->u.err, size-cur-1);
cur += errlen;
}
break;
}
/*}}}*/
/* default */ /*{{{*/
default: assert(0);
/*}}}*/
}
if (cur<size) dest[cur] = 0;
else
{
dest[size-1] = 0;
cur = size;
}
if (field_width && mbslen(dest) > field_width) {
for (cur = 0; cur < field_width; ++cur) dest[cur] = '#';
dest[cur] = 0;
}
return cur;
}
/*}}}*/
/* print -- print token sequence */ /*{{{*/
void print(char *s, size_t size, size_t chars, int quote, int scientific, int precision, Token **n)
{
size_t cur;
cur=0;
if (n != EMPTY_TVEC) for (; cur<size-1 && (*n) != NULLTOKEN; ++n)
cur += printtok(s+cur, size-cur, 0, quote, scientific, precision, 0, *n);
if (cur<size) s[cur] = 0;
else s[size-1] = 0;
if (chars && mbslen(s) > chars) {
for (cur=0; cur < chars; ++cur) s[cur] = '#';
s[cur] = 0;
}
}
/*}}}*/