feat: Allow expressions in region functions and add count() accumulator

This commit is contained in:
Glen Whitney 2023-04-29 19:16:44 -04:00
parent eb5d576349
commit a5a008a8a5
3 changed files with 430 additions and 445 deletions

View File

@ -6169,9 +6169,8 @@ Functions
\end_layout
\begin_layout Standard
This section documents all available functions in alphabetical order (except
for the short addressing/value fetching functions, which are listed together
at the beginning).
This section documents all available functions.
Functions are listed within each section in alphabetical order.
The functions are described in a C-like notation; you don't have to write
the types when you use the function in a formula.
For example, use
@ -6186,7 +6185,38 @@ This section documents all available functions in alphabetical order (except
If no type is given for the result of a function, it means the result type
depends on the arguments.
Brackets mark optional arguments.
\end_layout
\begin_layout Subsubsection
Addressing/value fetching functions
\end_layout
\begin_layout Standard
All of these functions come in pairs, an
\begin_inset Quotes eld
\end_inset
addressing
\begin_inset Quotes erd
\end_inset
one whose value is a
\emph on
location
\emph default
, and a corresponding
\begin_inset Quotes eld
\end_inset
fetching
\begin_inset Quotes erd
\end_inset
one that produces the
\emph on
value
\emph default
of the cell at the corresponding location.
\end_layout
\begin_layout Description
@ -6665,15 +6695,310 @@ X&
\series default
()
\family default
as well, which takes exactly the same arguments with the same meanings,
but it is rarely needed.
It is provided for completeness.
as well, which takes exactly the same arguments with the same meanings
and returns the corresponding location, rather than the value there, but
it is rarely needed.
It is provided for completeness of the pairing described in the introduction
to this section.
\begin_inset Newline newline
\end_inset
See the FAQ below for further discussion of cell references.
\end_layout
\begin_layout Subsubsection
Accumulation functions
\end_layout
\begin_layout Standard
All of these functions accumulate an entire array of values in some way.
They differ only in how the values are accumulated, and otherwise have
exactly the same behavior/signature, namely:
\end_layout
\begin_layout Description
\series bold
\emph on
accumulator
\family sans
\series default
\emph default
(
\family default
\series medium
[location
\emph on
\begin_inset space ~
\end_inset
l1
\emph default
[,
\begin_inset space ~
\end_inset
location
\emph on
\begin_inset space ~
\end_inset
l2
\emph default
[,
\emph on
\begin_inset space ~
\end_inset
expr
\emph default
]]])
\emph on
\begin_inset space ~
\end_inset
\emph default
|
\emph on
\begin_inset space ~
\end_inset
\series bold
accumulator
\series medium
\emph default
(
\emph on
v1
\family roman
,
\family default
\begin_inset space ~
\end_inset
v2,
\begin_inset space ~
\end_inset
...
\emph default
)
\series default
The first form evaluates to the accumulation of
\emph on
expr
\emph default
computed for each location in the block defined by corner locations
\emph on
l1
\emph default
and
\emph on
l2
\emph default
.
(Note that negative coordinates in the block corners are simply interpreted
as zero.) The expression
\emph on
expr
\emph default
defaults to
\begin_inset Quotes eld
\end_inset
\series bold
@
\series default
\begin_inset Quotes erd
\end_inset
in other words, it accumulates the values in that block.
The location
\emph on
l2
\emph default
defaults to
\emph on
l1
\emph default
, corresponding to a one-cell block, and
\emph on
l1
\emph default
in turn defaults to the current location.
The second form simply accumulates all of the argument values.
\end_layout
\begin_layout Standard
The available accumulators are:
\end_layout
\begin_layout Description
\series medium
int
\begin_inset space ~
\end_inset
\series default
count Returns the number of accumulated values that are true when converted
to a boolean value.
For a block defined by corner locations
\emph on
l1
\emph default
and
\emph on
l2
\emph default
,
\begin_inset Quotes eld
\end_inset
count(
\emph on
l1,l2
\emph default
)
\begin_inset Quotes erd
\end_inset
is essentially a shorthand for
\begin_inset Quotes eld
\end_inset
sum(
\emph on
l1, l2,
\emph default
int(bool()))
\begin_inset Quotes erd
\end_inset
.
\end_layout
\begin_layout Description
max In the first form, returns the
\emph on
location
\emph default
within the block of the maximum value being accumulated; to get the maximum
value occurring in the block with corners
\emph on
l1
\emph default
and
\emph on
l2
\emph default
, use
\begin_inset Quotes eld
\end_inset
@(max(
\emph on
l1
\emph default
,
\emph on
l2
\emph default
))
\begin_inset Quotes erd
\end_inset
instead.
In the second form, simply returns the maximum of the argument values.
Recall that in comparisons with the corresponding types, an empty cell
corresponds to 0, 0.0, or
\begin_inset Quotes eld
\end_inset
\begin_inset Quotes erd
\end_inset
, as needed.
This accumulator returns an error if it encounters values that are not
comparable (like a string and an integer).
\end_layout
\begin_layout Description
min Just like
\family sans
max
\family default
but for the minimum of the accumulated values.
\end_layout
\begin_layout Description
\series medium
int
\begin_inset space ~
\end_inset
\series default
n Returns the number of accumulated values that are not empty.
For a block defined by corner locations
\emph on
l1
\emph default
and
\emph on
l2
\emph default
,
\begin_inset Quotes eld
\end_inset
n(
\emph on
l1,l2
\emph default
)
\begin_inset Quotes erd
\end_inset
is essentially a shorthand for
\begin_inset Quotes eld
\end_inset
sum(
\emph on
l1, l2,
\emph default
int(-is(@,empty)))
\begin_inset Quotes erd
\end_inset
.
\end_layout
\begin_layout Description
sum Returns the sum of the accumulated values (recall for strings this is
the concatenation).
This accumulator returns an error if encounters values that cannot be added
(like a string and an integer).
\end_layout
\begin_layout Subsubsection
All other functions
\end_layout
\begin_layout Description
\series medium
@ -8177,302 +8502,6 @@ is
to convert three integers to a location.
\end_layout
\begin_layout Description
\series medium
location
\begin_inset space ~
\end_inset
\series default
max
\series medium
(location
\emph on
\begin_inset space ~
\end_inset
l1
\emph default
,
\begin_inset space ~
\end_inset
location
\emph on
\begin_inset space ~
\end_inset
l2
\emph default
)
\emph on
\begin_inset space ~
\end_inset
\emph default
|
\emph on
\begin_inset space ~
\end_inset
\series default
\emph default
max
\series medium
(
\emph on
v1
\emph default
,
\begin_inset space ~
\end_inset
\emph on
v2
\emph default
,
\emph on
\begin_inset space ~
\end_inset
\emph default
...)
\series default
evaluates to the maximum in the same way min does for the minimum.
\end_layout
\begin_layout Description
\series medium
location
\begin_inset space ~
\end_inset
\series default
min
\series medium
(location
\emph on
\begin_inset space ~
\end_inset
l1
\emph default
,
\begin_inset space ~
\end_inset
location
\emph on
\begin_inset space ~
\end_inset
l2
\emph default
)
\emph on
\begin_inset space ~
\end_inset
\emph default
|
\emph on
\begin_inset space ~
\end_inset
\series default
\emph default
min
\series medium
(
\emph on
v1
\emph default
,
\begin_inset space ~
\end_inset
\emph on
v2
\emph default
,
\emph on
\begin_inset space ~
\end_inset
\emph default
...)
\series default
The first form evaluates to the location of the minimum of all values in
the block marked by the corners pointed to by
\emph on
l1
\emph default
and
\emph on
l2
\emph default
.
Note that the empty cell is equal to 0, 0.0 and "", so if the first minimum
is an empty cell, the result will be a pointer to this cell, too.
If you are not interested in the location of the minimum but the value
itself, use @(min(
\emph on
l1
\emph default
,
\emph on
l2
\emph default
)).
The second form simply returns the smallest of the specified values, returning
an error if it encounters two that are not comparable (like a string and
an integer).
\end_layout
\begin_layout Description
\series medium
int
\begin_inset space ~
\end_inset
\series default
n
\series medium
([location
\emph on
\begin_inset space ~
\end_inset
l1
\emph default
[,
\begin_inset space ~
\end_inset
location
\emph on
\begin_inset space ~
\end_inset
l2
\emph default
])
\emph on
\begin_inset space ~
\end_inset
\emph default
|
\emph on
\begin_inset space ~
\end_inset
\series default
\emph default
n
\series medium
(
\family roman
\emph on
v1,
\family default
\begin_inset space ~
\end_inset
v2,
\begin_inset space ~
\end_inset
...
\emph default
)
\series default
The first form evaluates to the number of non-empty cells in the block
with corners at location
\emph on
s l1
\emph default
and
\emph on
l2
\emph default
.
Location
\emph on
l2
\emph default
defaults to
\emph on
l1
\emph default
; i.e., with a single location argument
\family sans
\series bold
n
\family default
\series medium
(
\family sans
\series default
\emph on
l1
\family default
\series medium
\emph default
)
\series default
just tests whether the cell at
\emph on
l1
\emph default
is empty.
Location
\emph on
l1
\emph default
defaults to the current location.
If any dimension of either location is negative, that component is simply
taken as 0.
In other words, locations off the sheet are considered empty, but it is
not an error to access them.
\begin_inset Newline newline
\end_inset
The second form simply returns the number of its arguments which are nonempty.
\end_layout
\begin_layout Description
number
\series medium
@ -9117,87 +9146,6 @@ y
If is omitted, the substring proceeds to the end of the string.
\end_layout
\begin_layout Description
sum
\series medium
(location
\emph on
\begin_inset space ~
\end_inset
l1
\emph default
,
\begin_inset space ~
\end_inset
location
\emph on
\begin_inset space ~
\end_inset
l2
\emph default
)
\emph on
\begin_inset space ~
\end_inset
\emph default
|
\emph on
\begin_inset space ~
\end_inset
\series default
\emph default
sum
\series medium
(
\family roman
\emph on
v1,
\family default
\begin_inset space ~
\end_inset
\family roman
v2,
\family default
\begin_inset space ~
\end_inset
\family roman
...
\emph default
)
\family default
\series default
The first form evaluates to the sum of all values in the block with corners
at locations
\emph on
l1
\emph default
and
\emph on
l2
\emph default
.
The second form simply adds all of its arguments.
\end_layout
\begin_layout Description
\series medium

View File

@ -1175,68 +1175,88 @@ typedef void (*RegFuncUpdt)(FunctionIdentifier id, Location *loc, Token *tok,
const Location *newloc, const Token* newtok);
typedef void (*RegFuncFinl)(FunctionIdentifier id, Location *loc, Token *tok);
/* region_func -- apply an operation over a whole region */
static Token region_func(RegFuncInit init, RegFuncUpdt updt, RegFuncFinl finl,
FunctionIdentifier id, int argc, const Token argv[])
/* region_macro -- apply an operation over an expression evaluated at
every location in a whole region, or every value in the region, or the
evaluated value of every argument to the macro. */
static Token region_macro(RegFuncInit init, RegFuncUpdt updt, RegFuncFinl finl,
FunctionIdentifier id, int argc, const Token argv[])
{
if (argc == 2 && argv[0].type == LOCATION && argv[1].type == LOCATION)
{
int x1 = argv[0].u.location[X];
int x2 = argv[1].u.location[X];
posorder(&x1, &x2);
int y1 = argv[0].u.location[Y];
int y2 = argv[1].u.location[Y];
posorder(&y1, &y2);
int z1 = argv[0].u.location[Z];
int z2 = argv[1].u.location[Z];
posorder(&z1,&z2);
Location l; l[X] = x1; l[Y] = y1; l[Z] = z1;
Token t = recompvalue(upd_sheet, l);
if (init != (RegFuncInit)0) init(id, &l, &t);
if (t.type == EEK) return t;
Location w;
for (w[X]=x1; w[X]<=x2; ++(w[X]))
for (w[Y]=y1; w[Y]<=y2; ++(w[Y]))
for (w[Z]=z1; w[Z]<=z2; ++(w[Z]))
{
Token tmp = recompvalue(upd_sheet, w);
updt(id, &l, &t, &w, &tmp);
tfree_protected(&tmp, t);
if (t.type == EEK) {
const char *templ = _("While computing %s() at &(%d,%d,%d): %s");
Token report;
report.type = EEK;
report.u.err =
malloc(strlen(templ) + MAX_FUNC_NAME_LENGTH + 3*20 + strlen(t.u.err));
sprintf(report.u.err, templ, tfunc[id].name, w[X], w[Y], w[Z], t.u.err);
tfree(&t);
return report;
}
Location l1; LOCATION_GETS(l1, upd_l);
Location l2; LOCATION_GETS(l2, upd_l);
bool block = argc > 0 && argc < 4; // could be a block with 1, 2, or 3 args
if (block) {
Token first = evaltoken(argv[0], FULL);
if (first.type == LOCATION) {
LOCATION_GETS(l1, first.u.location);
LOCATION_GETS(l2, l1);
} else {
block = false;
}
tfree_protected(&first, argv[0]);
if (block && argc > 1) {
Token second = evaltoken(argv[1], FULL);
if (second.type == LOCATION) {
LOCATION_GETS(l2, second.u.location);
} else {
block = false;
}
tfree_protected(&second, argv[1]);
}
if (finl != (RegFuncFinl)0) finl(id, &l, &t);
return t;
}
if (argc > 0) /* try to accumulate over all arguments */
{
if (!block) { // accumulate over all (evaluated) arguments
Location l; OLOCATION(l);
Token t = argv[0];
Token t = evaltoken(argv[0], FULL);
if (init != (RegFuncInit)0) init(id, &l, &t);
for (int i = 0; i < argc; ++i) {
Location fake; OLOCATION(fake); fake[X] = i;
updt(id, &l, &t, &fake, argv + i);
Token u = evaltoken(argv[i], FULL);
updt(id, &l, &t, &fake, &u);
tfree_protected(&u, t);
if (t.type == EEK) return t;
}
/* don't call finalize in this case because the region is fake */
return t;
}
const char* templ = _("Usage:%s(loc_start,loc_end)|%s(val2,val2,...)");
Token err;
err.type = EEK;
err.u.err = malloc(strlen(templ) + 2*MAX_FUNC_NAME_LENGTH + 1);
sprintf(err.u.err, templ, tfunc[id].name, tfunc[id].name);
return err;
// Evaluate over all cells in block defined by locations l1 and l2
int x1 = l1[X], x2 = l2[X]; posorder(&x1, &x2);
int y1 = l1[Y], y2 = l2[Y]; posorder(&y1, &y2);
int z1 = l1[Y], z2 = l2[Z]; posorder(&z1, &z2);
Location l; l[X] = x1; l[Y] = y1; l[Z] = z1;
Token t;
if (argc == 3) {
t = evaluate_at(argv[2], upd_sheet, l);
} else {
t = recompvalue(upd_sheet, l);
}
if (init != (RegFuncInit)0) init(id, &l, &t);
if (t.type == EEK) return t;
Location w;
for (w[X]=x1; w[X]<=x2; ++(w[X]))
for (w[Y]=y1; w[Y]<=y2; ++(w[Y]))
for (w[Z]=z1; w[Z]<=z2; ++(w[Z]))
{
Token tmp;
if (argc == 3) {
tmp = evaluate_at(argv[2], upd_sheet, w);
} else {
tmp = recompvalue(upd_sheet, w);
}
updt(id, &l, &t, &w, &tmp);
tfree_protected(&tmp, t);
if (t.type == EEK) {
const char *templ = _("While computing %s() at &(%d,%d,%d): %s");
Token report;
report.type = EEK;
report.u.err =
malloc(strlen(templ) + MAX_FUNC_NAME_LENGTH + 3*20 + strlen(t.u.err));
sprintf(report.u.err, templ, tfunc[id].name, w[X], w[Y], w[Z], t.u.err);
tfree(&t);
return report;
}
}
if (finl != (RegFuncFinl)0) finl(id, &l, &t);
return t;
}
static void sum_init(FunctionIdentifier id, Location *loc, Token *tok)
@ -1280,11 +1300,9 @@ static void minmax_finl(FunctionIdentifier id, Location *loc, Token *tok)
LOCATION_GETS(tok->u.location, *loc);
}
static void n_init(FunctionIdentifier id, Location *loc, Token *tok)
static void init_zero(FunctionIdentifier id, Location *loc, Token *tok)
{
assert(id == FUNC_N);
tfree(tok);
tok->type = EMPTY;
tok->type = INT;
tok->u.integer = 0;
}
@ -1296,6 +1314,20 @@ static void n_updt(FunctionIdentifier id, Location *loc, Token *tok,
tok->u.integer += (newtok->type != EMPTY);
}
static void count_updt(FunctionIdentifier id, Location *loc, Token *tok,
const Location* newloc, const Token *newtok)
{
assert(id == FUNC_COUNT);
Token countit = tbool(*newtok);
if (countit.type == EEK) {
tfree_protected(tok, countit);
*tok = countit;
return;
}
assert(countit.type == BOOL);
tok->u.integer += countit.u.bl;
}
static Token reg_disp(FunctionIdentifier self, int argc, const Token argv[])
{
RegFuncInit i = 0;
@ -1308,10 +1340,12 @@ static Token reg_disp(FunctionIdentifier self, int argc, const Token argv[])
case FUNC_MAX:
u = minmax_updt; f = minmax_finl; break;
case FUNC_N:
i = n_init; u = n_updt; break;
i = init_zero; u = n_updt; break;
case FUNC_COUNT:
i = init_zero; u = count_updt; break;
default: assert(0);
}
return region_func(i, u, f, self, argc, argv);
return region_macro(i, u, f, self, argc, argv);
}
/* binop_func -- common implementation of all binary operations
@ -1702,8 +1736,8 @@ static Token negate_func(FunctionIdentifier self, int argc, const Token argv[])
}
/* table of functions */ /*{{{*/
/* The order of these entries has no influence on performance, but to stay
compatible, new entries should be appended. */
/* The order of these entries is irrelevant because they just depend on the
values of the FUNC_XXX enum values. */
Tfunc tfunc[]=
{
/* Operators in order of increasing precedence */
@ -1782,10 +1816,11 @@ Tfunc tfunc[]=
[FUNC_CENTER] = { "center", self_func, PREFIX_FUNC, FUNCT, 0 },
/* Block operations */
[FUNC_MAX] = { "max", reg_disp, PREFIX_FUNC, FUNCT, 0 },
[FUNC_MIN] = { "min", reg_disp, PREFIX_FUNC, FUNCT, 0 },
[FUNC_N] = { "n", reg_disp, PREFIX_FUNC, FUNCT, 0 },
[FUNC_SUM] = { "sum", reg_disp, PREFIX_FUNC, FUNCT, 0 },
[FUNC_COUNT] = { "count", reg_disp, PREFIX_FUNC, MACRO, 0 },
[FUNC_MAX] = { "max", reg_disp, PREFIX_FUNC, MACRO, 0 },
[FUNC_MIN] = { "min", reg_disp, PREFIX_FUNC, MACRO, 0 },
[FUNC_N] = { "n", reg_disp, PREFIX_FUNC, MACRO, 0 },
[FUNC_SUM] = { "sum", reg_disp, PREFIX_FUNC, MACRO, 0 },
/* String functions */
[FUNC_LEN] = { "len", len_func, PREFIX_FUNC, FUNCT, 0 },

View File

@ -51,6 +51,8 @@ typedef enum
FUNC_DIM, FUNC_ITALIC,
FUNC_COUNT,
N_FUNCTION_IDS
} FunctionIdentifier;