feat: Allow expressions in region functions and add count() accumulator

2023-04-29 19:16:44 -04:00 · 2023-04-29 19:16:44 -04:00 · a5a008a8a5
commit a5a008a8a5
parent eb5d576349
3 changed files with 430 additions and 445 deletions
--- a/doc/teapot.lyx
+++ b/doc/teapot.lyx
@ -6169,9 +6169,8 @@ Functions
 \end_layout

 \begin_layout Standard
-This section documents all available functions in alphabetical order (except
- for the short addressing/value fetching functions, which are listed together
- at the beginning).
+This section documents all available functions.
+ Functions are listed within each section in alphabetical order.
 The functions are described in a C-like notation; you don't have to write
 the types when you use the function in a formula.
 For example, use 
@ -6186,7 +6185,38 @@ This section documents all available functions in alphabetical order (except
 If no type is given for the result of a function, it means the result type
 depends on the arguments.
 Brackets mark optional arguments.
- 
+\end_layout
+
+\begin_layout Subsubsection
+Addressing/value fetching functions
+\end_layout
+
+\begin_layout Standard
+All of these functions come in pairs, an 
+\begin_inset Quotes eld
+\end_inset
+
+addressing
+\begin_inset Quotes erd
+\end_inset
+
+ one whose value is a 
+\emph on
+location
+\emph default
+, and a corresponding 
+\begin_inset Quotes eld
+\end_inset
+
+fetching
+\begin_inset Quotes erd
+\end_inset
+
+ one that produces the 
+\emph on
+value
+\emph default
+ of the cell at the corresponding location.
 \end_layout

 \begin_layout Description
@ -6665,15 +6695,310 @@ X&
 \series default
 ()
 \family default
- as well, which takes exactly the same arguments with the same meanings,
- but it is rarely needed.
- It is provided for completeness.
+ as well, which takes exactly the same arguments with the same meanings
+ and returns the corresponding location, rather than the value there, but
+ it is rarely needed.
+ It is provided for completeness of the pairing described in the introduction
+ to this section.
 \begin_inset Newline newline
 \end_inset

 See the FAQ below for further discussion of cell references.
 \end_layout

+\begin_layout Subsubsection
+Accumulation functions
+\end_layout
+
+\begin_layout Standard
+All of these functions accumulate an entire array of values in some way.
+ They differ only in how the values are accumulated, and otherwise have
+ exactly the same behavior/signature, namely:
+\end_layout
+
+\begin_layout Description
+
+\series bold
+\emph on
+accumulator
+\family sans
+\series default
+\emph default
+(
+\family default
+\series medium
+[location
+\emph on
+
+\begin_inset space ~
+\end_inset
+
+l1
+\emph default
+[,
+\begin_inset space ~
+\end_inset
+
+location
+\emph on
+
+\begin_inset space ~
+\end_inset
+
+l2
+\emph default
+[,
+\emph on
+
+\begin_inset space ~
+\end_inset
+
+expr
+\emph default
+]]])
+\emph on
+
+\begin_inset space ~
+\end_inset
+
+
+\emph default
+|
+\emph on
+
+\begin_inset space ~
+\end_inset
+
+
+\series bold
+accumulator
+\series medium
+\emph default
+(
+\emph on
+v1
+\family roman
+,
+\family default
+
+\begin_inset space ~
+\end_inset
+
+v2,
+\begin_inset space ~
+\end_inset
+
+...
+\emph default
+) 
+\series default
+The first form evaluates to the accumulation of 
+\emph on
+expr
+\emph default
+ computed for each location in the block defined by corner locations 
+\emph on
+l1
+\emph default
+ and 
+\emph on
+l2
+\emph default
+.
+ (Note that negative coordinates in the block corners are simply interpreted
+ as zero.) The expression 
+\emph on
+expr
+\emph default
+ defaults to 
+\begin_inset Quotes eld
+\end_inset
+
+
+\series bold
+@
+\series default
+
+\begin_inset Quotes erd
+\end_inset
+
+ – in other words, it accumulates the values in that block.
+ The location 
+\emph on
+l2
+\emph default
+ defaults to 
+\emph on
+l1
+\emph default
+, corresponding to a one-cell block, and 
+\emph on
+l1
+\emph default
+ in turn defaults to the current location.
+ The second form simply accumulates all of the argument values.
+\end_layout
+
+\begin_layout Standard
+The available accumulators are:
+\end_layout
+
+\begin_layout Description
+
+\series medium
+int
+\begin_inset space ~
+\end_inset
+
+
+\series default
+count Returns the number of accumulated values that are true when converted
+ to a boolean value.
+ For a block defined by corner locations 
+\emph on
+l1
+\emph default
+ and 
+\emph on
+l2
+\emph default
+, 
+\begin_inset Quotes eld
+\end_inset
+
+count(
+\emph on
+l1,l2
+\emph default
+)
+\begin_inset Quotes erd
+\end_inset
+
+ is essentially a shorthand for 
+\begin_inset Quotes eld
+\end_inset
+
+sum(
+\emph on
+l1, l2,
+\emph default
+ int(bool()))
+\begin_inset Quotes erd
+\end_inset
+
+.
+\end_layout
+
+\begin_layout Description
+max In the first form, returns the 
+\emph on
+location
+\emph default
+ within the block of the maximum value being accumulated; to get the maximum
+ value occurring in the block with corners 
+\emph on
+l1
+\emph default
+ and 
+\emph on
+l2
+\emph default
+, use 
+\begin_inset Quotes eld
+\end_inset
+
+@(max(
+\emph on
+l1
+\emph default
+, 
+\emph on
+l2
+\emph default
+))
+\begin_inset Quotes erd
+\end_inset
+
+ instead.
+ In the second form, simply returns the maximum of the argument values.
+ Recall that in comparisons with the corresponding types, an empty cell
+ corresponds to 0, 0.0, or 
+\begin_inset Quotes eld
+\end_inset
+
+
+\begin_inset Quotes erd
+\end_inset
+
+, as needed.
+ This accumulator returns an error if it encounters values that are not
+ comparable (like a string and an integer).
+\end_layout
+
+\begin_layout Description
+min Just like 
+\family sans
+max
+\family default
+ but for the minimum of the accumulated values.
+\end_layout
+
+\begin_layout Description
+
+\series medium
+int
+\begin_inset space ~
+\end_inset
+
+
+\series default
+n Returns the number of accumulated values that are not empty.
+ For a block defined by corner locations 
+\emph on
+l1
+\emph default
+ and 
+\emph on
+l2
+\emph default
+, 
+\begin_inset Quotes eld
+\end_inset
+
+n(
+\emph on
+l1,l2
+\emph default
+)
+\begin_inset Quotes erd
+\end_inset
+
+ is essentially a shorthand for 
+\begin_inset Quotes eld
+\end_inset
+
+sum(
+\emph on
+l1, l2,
+\emph default
+ int(-is(@,empty)))
+\begin_inset Quotes erd
+\end_inset
+
+.
+\end_layout
+
+\begin_layout Description
+sum Returns the sum of the accumulated values (recall for strings this is
+ the concatenation).
+ This accumulator returns an error if encounters values that cannot be added
+ (like a string and an integer).
+\end_layout
+
+\begin_layout Subsubsection
+All other functions
+\end_layout
+
 \begin_layout Description

 \series medium
@ -8177,302 +8502,6 @@ is
 to convert three integers to a location.
 \end_layout

-\begin_layout Description
-
-\series medium
-location
-\begin_inset space ~
-\end_inset
-
-
-\series default
-max
-\series medium
-(location
-\emph on
-
-\begin_inset space ~
-\end_inset
-
-l1
-\emph default
-,
-\begin_inset space ~
-\end_inset
-
-location
-\emph on
-
-\begin_inset space ~
-\end_inset
-
-l2
-\emph default
-)
-\emph on
-
-\begin_inset space ~
-\end_inset
-
-
-\emph default
-|
-\emph on
-
-\begin_inset space ~
-\end_inset
-
-
-\series default
-\emph default
-max
-\series medium
-(
-\emph on
-v1
-\emph default
-,
-\begin_inset space ~
-\end_inset
-
-
-\emph on
-v2
-\emph default
-,
-\emph on
-
-\begin_inset space ~
-\end_inset
-
-
-\emph default
-...)
-\series default
- evaluates to the maximum in the same way min does for the minimum.
- 
-\end_layout
-
-\begin_layout Description
-
-\series medium
-location
-\begin_inset space ~
-\end_inset
-
-
-\series default
-min
-\series medium
-(location
-\emph on
-
-\begin_inset space ~
-\end_inset
-
-l1
-\emph default
-,
-\begin_inset space ~
-\end_inset
-
-location
-\emph on
-
-\begin_inset space ~
-\end_inset
-
-l2
-\emph default
-)
-\emph on
-
-\begin_inset space ~
-\end_inset
-
-
-\emph default
-|
-\emph on
-
-\begin_inset space ~
-\end_inset
-
-
-\series default
-\emph default
-min
-\series medium
-(
-\emph on
-v1
-\emph default
-,
-\begin_inset space ~
-\end_inset
-
-
-\emph on
-v2
-\emph default
-,
-\emph on
-
-\begin_inset space ~
-\end_inset
-
-
-\emph default
-...)
-\series default
- The first form evaluates to the location of the minimum of all values in
- the block marked by the corners pointed to by 
-\emph on
-l1
-\emph default
- and 
-\emph on
-l2
-\emph default
-.
- Note that the empty cell is equal to 0, 0.0 and "", so if the first minimum
- is an empty cell, the result will be a pointer to this cell, too.
- If you are not interested in the location of the minimum but the value
- itself, use @(min(
-\emph on
-l1
-\emph default
-, 
-\emph on
-l2
-\emph default
-)).
- The second form simply returns the smallest of the specified values, returning
- an error if it encounters two that are not comparable (like a string and
- an integer).
-\end_layout
-
-\begin_layout Description
-
-\series medium
-int
-\begin_inset space ~
-\end_inset
-
-
-\series default
-n
-\series medium
-([location
-\emph on
-
-\begin_inset space ~
-\end_inset
-
-l1
-\emph default
-[,
-\begin_inset space ~
-\end_inset
-
-location
-\emph on
-
-\begin_inset space ~
-\end_inset
-
-l2
-\emph default
-])
-\emph on
-
-\begin_inset space ~
-\end_inset
-
-
-\emph default
-|
-\emph on
-
-\begin_inset space ~
-\end_inset
-
-
-\series default
-\emph default
-n
-\series medium
-(
-\family roman
-\emph on
-v1,
-\family default
-
-\begin_inset space ~
-\end_inset
-
-v2,
-\begin_inset space ~
-\end_inset
-
-...
-\emph default
-)
-\series default
- The first form evaluates to the number of non-empty cells in the block
- with corners at location
-\emph on
-s l1
-\emph default
- and 
-\emph on
-l2
-\emph default
-.
- Location 
-\emph on
-l2
-\emph default
- defaults to 
-\emph on
-l1
-\emph default
-; i.e., with a single location argument 
-\family sans
-\series bold
-n
-\family default
-\series medium
-(
-\family sans
-\series default
-\emph on
-l1
-\family default
-\series medium
-\emph default
-)
-\series default
- just tests whether the cell at 
-\emph on
-l1
-\emph default
- is empty.
- Location 
-\emph on
-l1
-\emph default
- defaults to the current location.
- If any dimension of either location is negative, that component is simply
- taken as 0.
- In other words, locations off the sheet are considered empty, but it is
- not an error to access them.
-\begin_inset Newline newline
-\end_inset
-
-The second form simply returns the number of its arguments which are nonempty.
-\end_layout
-
 \begin_layout Description
 number
 \series medium
@ -9117,87 +9146,6 @@ y
 If is omitted, the substring proceeds to the end of the string.
 \end_layout

-\begin_layout Description
-sum
-\series medium
-(location
-\emph on
-
-\begin_inset space ~
-\end_inset
-
-l1
-\emph default
-,
-\begin_inset space ~
-\end_inset
-
-location
-\emph on
-
-\begin_inset space ~
-\end_inset
-
-l2
-\emph default
-)
-\emph on
-
-\begin_inset space ~
-\end_inset
-
-
-\emph default
-|
-\emph on
-
-\begin_inset space ~
-\end_inset
-
-
-\series default
-\emph default
-sum
-\series medium
-(
-\family roman
-\emph on
-v1,
-\family default
-
-\begin_inset space ~
-\end_inset
-
-
-\family roman
-v2,
-\family default
-
-\begin_inset space ~
-\end_inset
-
-
-\family roman
-...
-\emph default
-)
-\family default
- 
-\series default
-The first form evaluates to the sum of all values in the block with corners
- at locations 
-\emph on
-l1
-\emph default
- and 
-\emph on
-l2
-\emph default
-.
- The second form simply adds all of its arguments.
- 
-\end_layout
-
 \begin_layout Description

 \series medium
--- a/src/common/func.c
+++ b/src/common/func.c
@ -1175,68 +1175,88 @@ typedef void (*RegFuncUpdt)(FunctionIdentifier id, Location *loc, Token *tok,
 			    const Location *newloc, const Token* newtok);
 typedef void (*RegFuncFinl)(FunctionIdentifier id, Location *loc, Token *tok);

-/* region_func -- apply an operation over a whole region */
-static Token region_func(RegFuncInit init, RegFuncUpdt updt, RegFuncFinl finl,
-			 FunctionIdentifier id, int argc, const Token argv[])
+/* region_macro -- apply an operation over an expression evaluated at
+   every location in a whole region, or every value in the region, or the
+   evaluated value of every argument to the macro. */
+static Token region_macro(RegFuncInit init, RegFuncUpdt updt, RegFuncFinl finl,
+			  FunctionIdentifier id, int argc, const Token argv[])
 {
-  if (argc == 2 && argv[0].type == LOCATION && argv[1].type == LOCATION)
-  {
-    int x1 = argv[0].u.location[X];
-    int x2 = argv[1].u.location[X];
-    posorder(&x1, &x2);
-    int y1 = argv[0].u.location[Y];
-    int y2 = argv[1].u.location[Y];
-    posorder(&y1, &y2);
-    int z1 = argv[0].u.location[Z];
-    int z2 = argv[1].u.location[Z];
-    posorder(&z1,&z2);
-
-    Location l; l[X] = x1; l[Y] = y1; l[Z] = z1;
-    Token t = recompvalue(upd_sheet, l);
-    if (init != (RegFuncInit)0) init(id, &l, &t);
-    if (t.type == EEK) return t;
-
-    Location w;
-    for (w[X]=x1; w[X]<=x2; ++(w[X]))
-    for (w[Y]=y1; w[Y]<=y2; ++(w[Y]))
-    for (w[Z]=z1; w[Z]<=z2; ++(w[Z]))
-    {
-      Token tmp = recompvalue(upd_sheet, w);
-      updt(id, &l, &t, &w, &tmp);
-      tfree_protected(&tmp, t);
-      if (t.type == EEK) {
-        const char *templ = _("While computing %s() at &(%d,%d,%d): %s");
-	Token report;
-	report.type = EEK;
-	report.u.err =
-	  malloc(strlen(templ) + MAX_FUNC_NAME_LENGTH + 3*20 + strlen(t.u.err));
-	sprintf(report.u.err, templ, tfunc[id].name, w[X], w[Y], w[Z], t.u.err);
-	tfree(&t);
-	return report;
-      }
+  Location l1; LOCATION_GETS(l1, upd_l);
+  Location l2; LOCATION_GETS(l2, upd_l);
+  bool block = argc > 0 && argc < 4; // could be a block with 1, 2, or 3 args
+  if (block) {
+    Token first = evaltoken(argv[0], FULL);
+    if (first.type == LOCATION) {
+      LOCATION_GETS(l1, first.u.location);
+      LOCATION_GETS(l2, l1);
+    } else {
+      block = false;
+    }
+    tfree_protected(&first, argv[0]);
+    if (block && argc > 1) {
+      Token second = evaltoken(argv[1], FULL);
+      if (second.type == LOCATION) {
+        LOCATION_GETS(l2, second.u.location);
+      } else {
+        block = false;
+      }
+      tfree_protected(&second, argv[1]);
    }
-    if (finl != (RegFuncFinl)0) finl(id, &l, &t);
-    return t;
  }
-  if (argc > 0) /* try to accumulate over all arguments */
-  {
+  if (!block) { // accumulate over all (evaluated) arguments
    Location l; OLOCATION(l);
-    Token t = argv[0];
+    Token t = evaltoken(argv[0], FULL);
    if (init != (RegFuncInit)0) init(id, &l, &t);
    for (int i = 0; i < argc; ++i) {
      Location fake; OLOCATION(fake); fake[X] = i;
-      updt(id, &l, &t, &fake, argv + i);
+      Token u = evaltoken(argv[i], FULL);
+      updt(id, &l, &t, &fake, &u);
+      tfree_protected(&u, t);
      if (t.type == EEK) return t;
    }
    /* don't call finalize in this case because the region is fake */
    return t;
  }
-  const char* templ = _("Usage:%s(loc_start,loc_end)|%s(val2,val2,...)");
-  Token err;
-  err.type = EEK;
-  err.u.err = malloc(strlen(templ) + 2*MAX_FUNC_NAME_LENGTH + 1);
-  sprintf(err.u.err, templ, tfunc[id].name, tfunc[id].name);
-  return err;
+  // Evaluate over all cells in block defined by locations l1 and l2
+  int x1 = l1[X], x2 = l2[X]; posorder(&x1, &x2);
+  int y1 = l1[Y], y2 = l2[Y]; posorder(&y1, &y2);
+  int z1 = l1[Y], z2 = l2[Z]; posorder(&z1, &z2);
+  Location l; l[X] = x1; l[Y] = y1; l[Z] = z1;
+  Token t;
+  if (argc == 3) {
+    t = evaluate_at(argv[2], upd_sheet, l);
+  } else {
+    t = recompvalue(upd_sheet, l);
+  }
+  if (init != (RegFuncInit)0) init(id, &l, &t);
+  if (t.type == EEK) return t;
+
+  Location w;
+  for (w[X]=x1; w[X]<=x2; ++(w[X]))
+  for (w[Y]=y1; w[Y]<=y2; ++(w[Y]))
+  for (w[Z]=z1; w[Z]<=z2; ++(w[Z]))
+  {
+    Token tmp;
+    if (argc == 3) {
+      tmp = evaluate_at(argv[2], upd_sheet, w);
+    } else {
+      tmp = recompvalue(upd_sheet, w);
+    }
+    updt(id, &l, &t, &w, &tmp);
+    tfree_protected(&tmp, t);
+    if (t.type == EEK) {
+      const char *templ = _("While computing %s() at &(%d,%d,%d): %s");
+      Token report;
+      report.type = EEK;
+      report.u.err =
+        malloc(strlen(templ) + MAX_FUNC_NAME_LENGTH + 3*20 + strlen(t.u.err));
+      sprintf(report.u.err, templ, tfunc[id].name, w[X], w[Y], w[Z], t.u.err);
+      tfree(&t);
+      return report;
+    }
+  }
+  if (finl != (RegFuncFinl)0) finl(id, &l, &t);
+  return t;
 }

 static void sum_init(FunctionIdentifier id, Location *loc, Token *tok)
@ -1280,11 +1300,9 @@ static void minmax_finl(FunctionIdentifier id, Location *loc, Token *tok)
  LOCATION_GETS(tok->u.location, *loc);
 }

-static void n_init(FunctionIdentifier id, Location *loc, Token *tok)
+static void init_zero(FunctionIdentifier id, Location *loc, Token *tok)
 {
-  assert(id == FUNC_N);
  tfree(tok);
-  tok->type = EMPTY;
  tok->type = INT;
  tok->u.integer = 0;
 }
@ -1296,6 +1314,20 @@ static void n_updt(FunctionIdentifier id, Location *loc, Token *tok,
  tok->u.integer += (newtok->type != EMPTY);
 }

+static void count_updt(FunctionIdentifier id, Location *loc, Token *tok,
+                   const Location* newloc, const Token *newtok)
+{
+  assert(id == FUNC_COUNT);
+  Token countit = tbool(*newtok);
+  if (countit.type == EEK) {
+    tfree_protected(tok, countit);
+    *tok = countit;
+    return;
+  }
+  assert(countit.type == BOOL);
+  tok->u.integer += countit.u.bl;
+}
+
 static Token reg_disp(FunctionIdentifier self, int argc, const Token argv[])
 {
  RegFuncInit i = 0;
@ -1308,10 +1340,12 @@ static Token reg_disp(FunctionIdentifier self, int argc, const Token argv[])
    case FUNC_MAX:
      u = minmax_updt; f = minmax_finl; break;
    case FUNC_N:
-      i = n_init; u = n_updt; break;
+      i = init_zero; u = n_updt; break;
+    case FUNC_COUNT:
+      i = init_zero; u = count_updt; break;
    default: assert(0);
    }
-  return region_func(i, u, f, self, argc, argv);
+  return region_macro(i, u, f, self, argc, argv);
 }

 /* binop_func -- common implementation of all binary operations
@ -1702,8 +1736,8 @@ static Token negate_func(FunctionIdentifier self, int argc, const Token argv[])
 }

 /* table of functions */ /*{{{*/
-/* The order of these entries has no influence on performance, but to stay
-   compatible, new entries should be appended. */
+/* The order of these entries is irrelevant because they just depend on the
+   values of the FUNC_XXX enum values. */
 Tfunc tfunc[]=
 {
 /* Operators in order of increasing precedence */
@ -1782,10 +1816,11 @@ Tfunc tfunc[]=
 [FUNC_CENTER]      = { "center",      self_func,       PREFIX_FUNC, FUNCT, 0 },

 /* Block operations */
- [FUNC_MAX] = { "max", reg_disp, PREFIX_FUNC, FUNCT, 0 },
- [FUNC_MIN] = { "min", reg_disp, PREFIX_FUNC, FUNCT, 0 },
- [FUNC_N]   = { "n",   reg_disp, PREFIX_FUNC, FUNCT, 0 },
- [FUNC_SUM] = { "sum", reg_disp, PREFIX_FUNC, FUNCT, 0 },
+ [FUNC_COUNT] = { "count", reg_disp, PREFIX_FUNC, MACRO, 0 },
+ [FUNC_MAX]   = { "max",   reg_disp, PREFIX_FUNC, MACRO, 0 },
+ [FUNC_MIN]   = { "min",   reg_disp, PREFIX_FUNC, MACRO, 0 },
+ [FUNC_N]     = { "n",     reg_disp, PREFIX_FUNC, MACRO, 0 },
+ [FUNC_SUM]   = { "sum",   reg_disp, PREFIX_FUNC, MACRO, 0 },

 /* String functions */
 [FUNC_LEN]    = { "len",    len_func,    PREFIX_FUNC, FUNCT, 0 },
--- a/src/common/func.h
+++ b/src/common/func.h
@ -51,6 +51,8 @@ typedef enum

 FUNC_DIM, FUNC_ITALIC,

+ FUNC_COUNT,
+
 N_FUNCTION_IDS
 } FunctionIdentifier;