From a5a008a8a5f96ef1fe4afdbbb8c9c51f3869b1ef Mon Sep 17 00:00:00 2001 From: Glen Whitney Date: Sat, 29 Apr 2023 19:16:44 -0400 Subject: [PATCH] feat: Allow expressions in region functions and add count() accumulator --- doc/teapot.lyx | 716 +++++++++++++++++++++------------------------- src/common/func.c | 157 ++++++---- src/common/func.h | 2 + 3 files changed, 430 insertions(+), 445 deletions(-) diff --git a/doc/teapot.lyx b/doc/teapot.lyx index 6ab5ff7..176babb 100644 --- a/doc/teapot.lyx +++ b/doc/teapot.lyx @@ -6169,9 +6169,8 @@ Functions \end_layout \begin_layout Standard -This section documents all available functions in alphabetical order (except - for the short addressing/value fetching functions, which are listed together - at the beginning). +This section documents all available functions. + Functions are listed within each section in alphabetical order. The functions are described in a C-like notation; you don't have to write the types when you use the function in a formula. For example, use @@ -6186,7 +6185,38 @@ This section documents all available functions in alphabetical order (except If no type is given for the result of a function, it means the result type depends on the arguments. Brackets mark optional arguments. - +\end_layout + +\begin_layout Subsubsection +Addressing/value fetching functions +\end_layout + +\begin_layout Standard +All of these functions come in pairs, an +\begin_inset Quotes eld +\end_inset + +addressing +\begin_inset Quotes erd +\end_inset + + one whose value is a +\emph on +location +\emph default +, and a corresponding +\begin_inset Quotes eld +\end_inset + +fetching +\begin_inset Quotes erd +\end_inset + + one that produces the +\emph on +value +\emph default + of the cell at the corresponding location. \end_layout \begin_layout Description @@ -6665,15 +6695,310 @@ X& \series default () \family default - as well, which takes exactly the same arguments with the same meanings, - but it is rarely needed. - It is provided for completeness. + as well, which takes exactly the same arguments with the same meanings + and returns the corresponding location, rather than the value there, but + it is rarely needed. + It is provided for completeness of the pairing described in the introduction + to this section. \begin_inset Newline newline \end_inset See the FAQ below for further discussion of cell references. \end_layout +\begin_layout Subsubsection +Accumulation functions +\end_layout + +\begin_layout Standard +All of these functions accumulate an entire array of values in some way. + They differ only in how the values are accumulated, and otherwise have + exactly the same behavior/signature, namely: +\end_layout + +\begin_layout Description + +\series bold +\emph on +accumulator +\family sans +\series default +\emph default +( +\family default +\series medium +[location +\emph on + +\begin_inset space ~ +\end_inset + +l1 +\emph default +[, +\begin_inset space ~ +\end_inset + +location +\emph on + +\begin_inset space ~ +\end_inset + +l2 +\emph default +[, +\emph on + +\begin_inset space ~ +\end_inset + +expr +\emph default +]]]) +\emph on + +\begin_inset space ~ +\end_inset + + +\emph default +| +\emph on + +\begin_inset space ~ +\end_inset + + +\series bold +accumulator +\series medium +\emph default +( +\emph on +v1 +\family roman +, +\family default + +\begin_inset space ~ +\end_inset + +v2, +\begin_inset space ~ +\end_inset + +... +\emph default +) +\series default +The first form evaluates to the accumulation of +\emph on +expr +\emph default + computed for each location in the block defined by corner locations +\emph on +l1 +\emph default + and +\emph on +l2 +\emph default +. + (Note that negative coordinates in the block corners are simply interpreted + as zero.) The expression +\emph on +expr +\emph default + defaults to +\begin_inset Quotes eld +\end_inset + + +\series bold +@ +\series default + +\begin_inset Quotes erd +\end_inset + + – in other words, it accumulates the values in that block. + The location +\emph on +l2 +\emph default + defaults to +\emph on +l1 +\emph default +, corresponding to a one-cell block, and +\emph on +l1 +\emph default + in turn defaults to the current location. + The second form simply accumulates all of the argument values. +\end_layout + +\begin_layout Standard +The available accumulators are: +\end_layout + +\begin_layout Description + +\series medium +int +\begin_inset space ~ +\end_inset + + +\series default +count Returns the number of accumulated values that are true when converted + to a boolean value. + For a block defined by corner locations +\emph on +l1 +\emph default + and +\emph on +l2 +\emph default +, +\begin_inset Quotes eld +\end_inset + +count( +\emph on +l1,l2 +\emph default +) +\begin_inset Quotes erd +\end_inset + + is essentially a shorthand for +\begin_inset Quotes eld +\end_inset + +sum( +\emph on +l1, l2, +\emph default + int(bool())) +\begin_inset Quotes erd +\end_inset + +. +\end_layout + +\begin_layout Description +max In the first form, returns the +\emph on +location +\emph default + within the block of the maximum value being accumulated; to get the maximum + value occurring in the block with corners +\emph on +l1 +\emph default + and +\emph on +l2 +\emph default +, use +\begin_inset Quotes eld +\end_inset + +@(max( +\emph on +l1 +\emph default +, +\emph on +l2 +\emph default +)) +\begin_inset Quotes erd +\end_inset + + instead. + In the second form, simply returns the maximum of the argument values. + Recall that in comparisons with the corresponding types, an empty cell + corresponds to 0, 0.0, or +\begin_inset Quotes eld +\end_inset + + +\begin_inset Quotes erd +\end_inset + +, as needed. + This accumulator returns an error if it encounters values that are not + comparable (like a string and an integer). +\end_layout + +\begin_layout Description +min Just like +\family sans +max +\family default + but for the minimum of the accumulated values. +\end_layout + +\begin_layout Description + +\series medium +int +\begin_inset space ~ +\end_inset + + +\series default +n Returns the number of accumulated values that are not empty. + For a block defined by corner locations +\emph on +l1 +\emph default + and +\emph on +l2 +\emph default +, +\begin_inset Quotes eld +\end_inset + +n( +\emph on +l1,l2 +\emph default +) +\begin_inset Quotes erd +\end_inset + + is essentially a shorthand for +\begin_inset Quotes eld +\end_inset + +sum( +\emph on +l1, l2, +\emph default + int(-is(@,empty))) +\begin_inset Quotes erd +\end_inset + +. +\end_layout + +\begin_layout Description +sum Returns the sum of the accumulated values (recall for strings this is + the concatenation). + This accumulator returns an error if encounters values that cannot be added + (like a string and an integer). +\end_layout + +\begin_layout Subsubsection +All other functions +\end_layout + \begin_layout Description \series medium @@ -8177,302 +8502,6 @@ is to convert three integers to a location. \end_layout -\begin_layout Description - -\series medium -location -\begin_inset space ~ -\end_inset - - -\series default -max -\series medium -(location -\emph on - -\begin_inset space ~ -\end_inset - -l1 -\emph default -, -\begin_inset space ~ -\end_inset - -location -\emph on - -\begin_inset space ~ -\end_inset - -l2 -\emph default -) -\emph on - -\begin_inset space ~ -\end_inset - - -\emph default -| -\emph on - -\begin_inset space ~ -\end_inset - - -\series default -\emph default -max -\series medium -( -\emph on -v1 -\emph default -, -\begin_inset space ~ -\end_inset - - -\emph on -v2 -\emph default -, -\emph on - -\begin_inset space ~ -\end_inset - - -\emph default -...) -\series default - evaluates to the maximum in the same way min does for the minimum. - -\end_layout - -\begin_layout Description - -\series medium -location -\begin_inset space ~ -\end_inset - - -\series default -min -\series medium -(location -\emph on - -\begin_inset space ~ -\end_inset - -l1 -\emph default -, -\begin_inset space ~ -\end_inset - -location -\emph on - -\begin_inset space ~ -\end_inset - -l2 -\emph default -) -\emph on - -\begin_inset space ~ -\end_inset - - -\emph default -| -\emph on - -\begin_inset space ~ -\end_inset - - -\series default -\emph default -min -\series medium -( -\emph on -v1 -\emph default -, -\begin_inset space ~ -\end_inset - - -\emph on -v2 -\emph default -, -\emph on - -\begin_inset space ~ -\end_inset - - -\emph default -...) -\series default - The first form evaluates to the location of the minimum of all values in - the block marked by the corners pointed to by -\emph on -l1 -\emph default - and -\emph on -l2 -\emph default -. - Note that the empty cell is equal to 0, 0.0 and "", so if the first minimum - is an empty cell, the result will be a pointer to this cell, too. - If you are not interested in the location of the minimum but the value - itself, use @(min( -\emph on -l1 -\emph default -, -\emph on -l2 -\emph default -)). - The second form simply returns the smallest of the specified values, returning - an error if it encounters two that are not comparable (like a string and - an integer). -\end_layout - -\begin_layout Description - -\series medium -int -\begin_inset space ~ -\end_inset - - -\series default -n -\series medium -([location -\emph on - -\begin_inset space ~ -\end_inset - -l1 -\emph default -[, -\begin_inset space ~ -\end_inset - -location -\emph on - -\begin_inset space ~ -\end_inset - -l2 -\emph default -]) -\emph on - -\begin_inset space ~ -\end_inset - - -\emph default -| -\emph on - -\begin_inset space ~ -\end_inset - - -\series default -\emph default -n -\series medium -( -\family roman -\emph on -v1, -\family default - -\begin_inset space ~ -\end_inset - -v2, -\begin_inset space ~ -\end_inset - -... -\emph default -) -\series default - The first form evaluates to the number of non-empty cells in the block - with corners at location -\emph on -s l1 -\emph default - and -\emph on -l2 -\emph default -. - Location -\emph on -l2 -\emph default - defaults to -\emph on -l1 -\emph default -; i.e., with a single location argument -\family sans -\series bold -n -\family default -\series medium -( -\family sans -\series default -\emph on -l1 -\family default -\series medium -\emph default -) -\series default - just tests whether the cell at -\emph on -l1 -\emph default - is empty. - Location -\emph on -l1 -\emph default - defaults to the current location. - If any dimension of either location is negative, that component is simply - taken as 0. - In other words, locations off the sheet are considered empty, but it is - not an error to access them. -\begin_inset Newline newline -\end_inset - -The second form simply returns the number of its arguments which are nonempty. -\end_layout - \begin_layout Description number \series medium @@ -9117,87 +9146,6 @@ y If is omitted, the substring proceeds to the end of the string. \end_layout -\begin_layout Description -sum -\series medium -(location -\emph on - -\begin_inset space ~ -\end_inset - -l1 -\emph default -, -\begin_inset space ~ -\end_inset - -location -\emph on - -\begin_inset space ~ -\end_inset - -l2 -\emph default -) -\emph on - -\begin_inset space ~ -\end_inset - - -\emph default -| -\emph on - -\begin_inset space ~ -\end_inset - - -\series default -\emph default -sum -\series medium -( -\family roman -\emph on -v1, -\family default - -\begin_inset space ~ -\end_inset - - -\family roman -v2, -\family default - -\begin_inset space ~ -\end_inset - - -\family roman -... -\emph default -) -\family default - -\series default -The first form evaluates to the sum of all values in the block with corners - at locations -\emph on -l1 -\emph default - and -\emph on -l2 -\emph default -. - The second form simply adds all of its arguments. - -\end_layout - \begin_layout Description \series medium diff --git a/src/common/func.c b/src/common/func.c index 37f987b..ff17f26 100644 --- a/src/common/func.c +++ b/src/common/func.c @@ -1175,68 +1175,88 @@ typedef void (*RegFuncUpdt)(FunctionIdentifier id, Location *loc, Token *tok, const Location *newloc, const Token* newtok); typedef void (*RegFuncFinl)(FunctionIdentifier id, Location *loc, Token *tok); -/* region_func -- apply an operation over a whole region */ -static Token region_func(RegFuncInit init, RegFuncUpdt updt, RegFuncFinl finl, - FunctionIdentifier id, int argc, const Token argv[]) +/* region_macro -- apply an operation over an expression evaluated at + every location in a whole region, or every value in the region, or the + evaluated value of every argument to the macro. */ +static Token region_macro(RegFuncInit init, RegFuncUpdt updt, RegFuncFinl finl, + FunctionIdentifier id, int argc, const Token argv[]) { - if (argc == 2 && argv[0].type == LOCATION && argv[1].type == LOCATION) - { - int x1 = argv[0].u.location[X]; - int x2 = argv[1].u.location[X]; - posorder(&x1, &x2); - int y1 = argv[0].u.location[Y]; - int y2 = argv[1].u.location[Y]; - posorder(&y1, &y2); - int z1 = argv[0].u.location[Z]; - int z2 = argv[1].u.location[Z]; - posorder(&z1,&z2); - - Location l; l[X] = x1; l[Y] = y1; l[Z] = z1; - Token t = recompvalue(upd_sheet, l); - if (init != (RegFuncInit)0) init(id, &l, &t); - if (t.type == EEK) return t; - - Location w; - for (w[X]=x1; w[X]<=x2; ++(w[X])) - for (w[Y]=y1; w[Y]<=y2; ++(w[Y])) - for (w[Z]=z1; w[Z]<=z2; ++(w[Z])) - { - Token tmp = recompvalue(upd_sheet, w); - updt(id, &l, &t, &w, &tmp); - tfree_protected(&tmp, t); - if (t.type == EEK) { - const char *templ = _("While computing %s() at &(%d,%d,%d): %s"); - Token report; - report.type = EEK; - report.u.err = - malloc(strlen(templ) + MAX_FUNC_NAME_LENGTH + 3*20 + strlen(t.u.err)); - sprintf(report.u.err, templ, tfunc[id].name, w[X], w[Y], w[Z], t.u.err); - tfree(&t); - return report; - } + Location l1; LOCATION_GETS(l1, upd_l); + Location l2; LOCATION_GETS(l2, upd_l); + bool block = argc > 0 && argc < 4; // could be a block with 1, 2, or 3 args + if (block) { + Token first = evaltoken(argv[0], FULL); + if (first.type == LOCATION) { + LOCATION_GETS(l1, first.u.location); + LOCATION_GETS(l2, l1); + } else { + block = false; + } + tfree_protected(&first, argv[0]); + if (block && argc > 1) { + Token second = evaltoken(argv[1], FULL); + if (second.type == LOCATION) { + LOCATION_GETS(l2, second.u.location); + } else { + block = false; + } + tfree_protected(&second, argv[1]); } - if (finl != (RegFuncFinl)0) finl(id, &l, &t); - return t; } - if (argc > 0) /* try to accumulate over all arguments */ - { + if (!block) { // accumulate over all (evaluated) arguments Location l; OLOCATION(l); - Token t = argv[0]; + Token t = evaltoken(argv[0], FULL); if (init != (RegFuncInit)0) init(id, &l, &t); for (int i = 0; i < argc; ++i) { Location fake; OLOCATION(fake); fake[X] = i; - updt(id, &l, &t, &fake, argv + i); + Token u = evaltoken(argv[i], FULL); + updt(id, &l, &t, &fake, &u); + tfree_protected(&u, t); if (t.type == EEK) return t; } /* don't call finalize in this case because the region is fake */ return t; } - const char* templ = _("Usage:%s(loc_start,loc_end)|%s(val2,val2,...)"); - Token err; - err.type = EEK; - err.u.err = malloc(strlen(templ) + 2*MAX_FUNC_NAME_LENGTH + 1); - sprintf(err.u.err, templ, tfunc[id].name, tfunc[id].name); - return err; + // Evaluate over all cells in block defined by locations l1 and l2 + int x1 = l1[X], x2 = l2[X]; posorder(&x1, &x2); + int y1 = l1[Y], y2 = l2[Y]; posorder(&y1, &y2); + int z1 = l1[Y], z2 = l2[Z]; posorder(&z1, &z2); + Location l; l[X] = x1; l[Y] = y1; l[Z] = z1; + Token t; + if (argc == 3) { + t = evaluate_at(argv[2], upd_sheet, l); + } else { + t = recompvalue(upd_sheet, l); + } + if (init != (RegFuncInit)0) init(id, &l, &t); + if (t.type == EEK) return t; + + Location w; + for (w[X]=x1; w[X]<=x2; ++(w[X])) + for (w[Y]=y1; w[Y]<=y2; ++(w[Y])) + for (w[Z]=z1; w[Z]<=z2; ++(w[Z])) + { + Token tmp; + if (argc == 3) { + tmp = evaluate_at(argv[2], upd_sheet, w); + } else { + tmp = recompvalue(upd_sheet, w); + } + updt(id, &l, &t, &w, &tmp); + tfree_protected(&tmp, t); + if (t.type == EEK) { + const char *templ = _("While computing %s() at &(%d,%d,%d): %s"); + Token report; + report.type = EEK; + report.u.err = + malloc(strlen(templ) + MAX_FUNC_NAME_LENGTH + 3*20 + strlen(t.u.err)); + sprintf(report.u.err, templ, tfunc[id].name, w[X], w[Y], w[Z], t.u.err); + tfree(&t); + return report; + } + } + if (finl != (RegFuncFinl)0) finl(id, &l, &t); + return t; } static void sum_init(FunctionIdentifier id, Location *loc, Token *tok) @@ -1280,11 +1300,9 @@ static void minmax_finl(FunctionIdentifier id, Location *loc, Token *tok) LOCATION_GETS(tok->u.location, *loc); } -static void n_init(FunctionIdentifier id, Location *loc, Token *tok) +static void init_zero(FunctionIdentifier id, Location *loc, Token *tok) { - assert(id == FUNC_N); tfree(tok); - tok->type = EMPTY; tok->type = INT; tok->u.integer = 0; } @@ -1296,6 +1314,20 @@ static void n_updt(FunctionIdentifier id, Location *loc, Token *tok, tok->u.integer += (newtok->type != EMPTY); } +static void count_updt(FunctionIdentifier id, Location *loc, Token *tok, + const Location* newloc, const Token *newtok) +{ + assert(id == FUNC_COUNT); + Token countit = tbool(*newtok); + if (countit.type == EEK) { + tfree_protected(tok, countit); + *tok = countit; + return; + } + assert(countit.type == BOOL); + tok->u.integer += countit.u.bl; +} + static Token reg_disp(FunctionIdentifier self, int argc, const Token argv[]) { RegFuncInit i = 0; @@ -1308,10 +1340,12 @@ static Token reg_disp(FunctionIdentifier self, int argc, const Token argv[]) case FUNC_MAX: u = minmax_updt; f = minmax_finl; break; case FUNC_N: - i = n_init; u = n_updt; break; + i = init_zero; u = n_updt; break; + case FUNC_COUNT: + i = init_zero; u = count_updt; break; default: assert(0); } - return region_func(i, u, f, self, argc, argv); + return region_macro(i, u, f, self, argc, argv); } /* binop_func -- common implementation of all binary operations @@ -1702,8 +1736,8 @@ static Token negate_func(FunctionIdentifier self, int argc, const Token argv[]) } /* table of functions */ /*{{{*/ -/* The order of these entries has no influence on performance, but to stay - compatible, new entries should be appended. */ +/* The order of these entries is irrelevant because they just depend on the + values of the FUNC_XXX enum values. */ Tfunc tfunc[]= { /* Operators in order of increasing precedence */ @@ -1782,10 +1816,11 @@ Tfunc tfunc[]= [FUNC_CENTER] = { "center", self_func, PREFIX_FUNC, FUNCT, 0 }, /* Block operations */ - [FUNC_MAX] = { "max", reg_disp, PREFIX_FUNC, FUNCT, 0 }, - [FUNC_MIN] = { "min", reg_disp, PREFIX_FUNC, FUNCT, 0 }, - [FUNC_N] = { "n", reg_disp, PREFIX_FUNC, FUNCT, 0 }, - [FUNC_SUM] = { "sum", reg_disp, PREFIX_FUNC, FUNCT, 0 }, + [FUNC_COUNT] = { "count", reg_disp, PREFIX_FUNC, MACRO, 0 }, + [FUNC_MAX] = { "max", reg_disp, PREFIX_FUNC, MACRO, 0 }, + [FUNC_MIN] = { "min", reg_disp, PREFIX_FUNC, MACRO, 0 }, + [FUNC_N] = { "n", reg_disp, PREFIX_FUNC, MACRO, 0 }, + [FUNC_SUM] = { "sum", reg_disp, PREFIX_FUNC, MACRO, 0 }, /* String functions */ [FUNC_LEN] = { "len", len_func, PREFIX_FUNC, FUNCT, 0 }, diff --git a/src/common/func.h b/src/common/func.h index 62b4574..3de1879 100644 --- a/src/common/func.h +++ b/src/common/func.h @@ -51,6 +51,8 @@ typedef enum FUNC_DIM, FUNC_ITALIC, + FUNC_COUNT, + N_FUNCTION_IDS } FunctionIdentifier;