teapot-spreadsheet/src/common/scanner.c

/* #includes */ /*{{{C}}}*//*{{{*/
#ifndef NO_POSIX_SOURCE
#undef _POSIX_SOURCE
#define _POSIX_SOURCE   1
#undef _POSIX_C_SOURCE
#define _POSIX_C_SOURCE 2
#endif

#ifdef DMALLOC
#include "dmalloc.h"
#endif

#include <assert.h>
#include <ctype.h>
extern char *strdup(const char* s);
extern double strtod(const char *nptr, char **endptr); /* SunOS 4 hack */
extern long double strtold(const char *nptr, char **endptr); /* SunOS 4 hack */
#include <string.h>


#include "default.h"
#include "func.h"
#include "main.h"
#include "misc.h"
#include "scanner.h"
/*}}}*/

const char *Type_Name[] =
  { [EMPTY] = "EMPTY", [STRING] = "STRING", [FLOAT] = "FLOAT", [INT] = "INT",
    [OPERATOR] = "OPERATOR", [LIDENT] = "LABEL", [FIDENT] = "FUNCTION",
    [LOCATION] = "LOCATION", [FUNCALL] = "FUNCTION-CALL", [EEK] = "ERROR",
    [BOOL] = "BOOL"
  };

const char *Op_Name[] =
  { [PLUS] = "+", [MINUS] = "-", [MUL] = "*", [DIV] = "/",
    [OP] = "(", [CP] = ")", [COMMA] = ",",
    [LT] = "<", [LE] = "<=", [GE] = ">=", [GT] = ">",
    [ISEQUAL] = "==", [ABOUTEQ] = "~=", [NE] = "!=",
    [POW] = "^", [MOD] = "%", [LAND] = "and", [LOR] = "or"
  };

/* loc_in_box  -- returns true if test is in the box determined by b and c */
bool loc_in_box(const Location test,
		       const Location b, const Location c)
{
  for (Dimensions dim = X; dim < HYPER; ++dim)
  {
    if (test[dim] < b[dim] && test[dim] < c[dim]) return false;
    if (test[dim] > b[dim] && test[dim] > c[dim]) return false;
  }
  return true;
}

/* cleartoken - Initialize all of the memory of a token */ /*{{{*/
void cleartoken(Token* tok)
{
  tok->type = EMPTY;
  tok->u.flt = 0.0;
  tok->u.location[0] = 0;
  tok->u.location[1] = 0;
  tok->u.location[2] = 0;
}

/* tok_matches - return true if l and r appear to be the same token */ /*{{{*/
bool tok_matches(const Token* l, const Token *r)
{
  if (l->type != r->type) return false;
  switch (l->type) {
  case EMPTY: return true;
  case STRING: return l->u.string == r->u.string;
  case FLOAT: return l->u.flt == r->u.flt;
  case INT: return l->u.integer == r->u.integer;
  case OPERATOR: return l->u.op == r->u.op;
  case LIDENT: return l->u.lident == r->u.lident;
  case FIDENT: return l->u.fident == r->u.fident;
  case LOCATION:
    return l->u.location[X] == r->u.location[X]
      && l->u.location[Y] == r->u.location[Y]
      && l->u.location[Z] == r->u.location[Z];
  case EEK: return l->u.err == r->u.err;
  case FUNCALL:
    return l->u.funcall.fident == r->u.funcall.fident
      && l->u.funcall.argc == r->u.funcall.argc
      && l->u.funcall.argv == r->u.funcall.argv;
  case BOOL:
    return l->u.bl == r->u.bl;
  }
  assert(0);
  return false;
}

/* duperror - Sets tok to an error and strdups the message into place */
Token duperror(Token* tok, const char* erro)
{
  tok->type = EEK;
  tok->u.err = strdup(erro);
  return *tok;
}

/* charstring -- match quoted string and return token */ /*{{{*/
static Token *charstring(char **s)
{
  char *r;

  r = *s;
  if (**s=='"')
  {
    ++(*s);
    while (**s!='\0' && **s!='"') if (**s=='\\' && *((*s)+1)!='\0') (*s)+=2; else ++(*s);
    if (**s=='\0') { *s=r; return 0; }
    else
    {
      Token *n;
      char *t;

      ++(*s);
      n=malloc(sizeof(Token));
      n->type=STRING;
      t=n->u.string=malloc((size_t)(*s-r));
      /* Clean string of quotes.  This may waste a few bytes, so? */
      ++r;
      while (r<(*s-1)) if (*r=='\\') { *t++=*(r+1); r+=2; } else *t++=*r++;
      *t='\0';
      return n;
    }
  }
  else return (Token*)0;
}
/*}}}*/

/* scan_integer -- match an integer and return token */ /*{{{*/
Token *scan_integer(char **s)
{
  char *r = *s;
  IntT i = STRTOINT(r, s, 0);
  if (*s != r && **s != '.' && **s != 'e')
  {
    Token *n = malloc(sizeof(Token));
    n->type = INT;
    n->u.integer = i;
    return n;
  }
  else { *s = r; return NULLTOKEN; }
}
/*}}}*/

/* scan_flt        -- match a floating point number */ /*{{{*/
Token *scan_flt(char **s)
{
  char *t = *s;
  char *end;
  FltT x = STRTOFLT(t, &end);

  *s = end;
  if (t !=* s && dblfinite(x) == NULL)
  {
    Token *n = malloc(sizeof(Token));
    n->type = FLOAT;
    n->u.flt = x;
    return n;
  }
  else
  {
    *s = t;
    return NULLTOKEN;
  }
}
/*}}}*/

/* op   -- match an op and return token */ /*{{{*/
static Token *op(char **s)
{
  Token *n;
  Operator op;

  switch (**s)
  {
    case '+': op=PLUS; break;
    case '-': op=MINUS; break;
    case '*': op=MUL; break;
    case '/': op=DIV; break;
    case '%': op=MOD; break;
    case '(': op=OP; break;
    case ')': op=CP; break;
    case ',': op=COMMA; break;
    case '^': op=POW; break;
    case '<': if (*(*s+1)=='=') { ++(*s); op=LE; } else op=LT; break;
    case '>': if (*(*s+1)=='=') { ++(*s); op=GE; } else op=GT; break;
    case '=': if (*(*s+1)=='=') { ++(*s); op=ISEQUAL; } else return (Token*)0; break;
    case '~': if (*(*s+1)=='=') { ++(*s); op=ABOUTEQ; } else return (Token*)0; break;
    case '!': if (*(*s+1)=='=') { ++(*s); op=NE; } else return (Token*)0; break;
    default: return (Token*)0;
  }
  n=malloc(sizeof(Token));
  n->type=OPERATOR;
  n->u.op=op;
  ++(*s);
  return n;
}
/*}}}*/
/* scan_ident      -- match an identifier and return token */ /*{{{*/
Token *scan_ident(char **s)
{
  const char *begin;
  Token *result;

  if (isalpha((int)**s) || **s=='_' || **s=='@' || **s=='&' || **s=='.' || **s=='$')
  {
    int fident;

    begin=*s; ++(*s);
    while (isalpha((int)**s) || **s=='_' || **s=='@' || **s=='&' || **s=='.' || **s=='$' || isdigit((int)**s)) ++(*s);
    result = malloc(sizeof(Token));
    if (*s-begin == 3 && strncmp(begin, "and", 3) == 0)
    {
      result->type = OPERATOR;
      result->u.op = LAND;
    }
    else if (*s-begin == 2 && strncmp(begin, "or", 2) == 0)
    {
      result->type = OPERATOR;
      result->u.op = LOR;
    }
    else if ((fident = identcode(begin,(size_t)(*s-begin))) == NOT_A_FUNCTION)
    {
      result->type = LIDENT;
      result->u.lident=malloc((size_t)(*s-begin+1));
      (void)strncpy(result->u.lident,begin,(size_t)(*s-begin));
      result->u.lident[*s-begin]='\0';
    }
    else
    {
      result->type = FIDENT;
      result->u.fident = fident;
    }
    return result;
  }
  return NULLTOKEN;
}
/*}}}*/

/* scan       -- scan string into tokens */ /*{{{*/
Token **scan(char **s)
{
  /* variables */ /*{{{*/
  Token **na,*n;
  char *r;
  /*}}}*/

  /* compute number of tokens */ /*{{{*/
  r = *s;
  while (*r == ' ') ++r;
  int i = 0;
  for (; *r != '\0'; ++i)
  {
    const char *or;

    or = r;
    while (*r == ' ') ++r;
    n = charstring(&r);
    if (n == NULLTOKEN) n = op(&r);
    if (n == NULLTOKEN) n = scan_integer(&r);
    if (n == NULLTOKEN) n = scan_flt(&r);
    if (n == NULLTOKEN) n = scan_ident(&r);
    if (or == r) { *s = r; return EMPTY_TVEC; }
  }
  /*}}}*/
  /* allocate token space */ /*{{{*/
  na = malloc(sizeof(Token*)*(i+1));
  /*}}}*/
  /* store tokens */ /*{{{*/
  r = *s;
  while (*r==' ') ++r;
  for (int j = 0; j < i; ++j)
  {
    while (*r == ' ') ++r;
    n = charstring(&r);
    if (n == NULLTOKEN) n = op(&r);
    if (n == NULLTOKEN) n = scan_integer(&r);
    if (n == NULLTOKEN) n = scan_flt(&r);
    if (n == NULLTOKEN) n = scan_ident(&r);
    na[j] = n;
  }
  na[i] = NULLTOKEN;
  /*}}}*/
  return na;
}
/*}}}*/