From 829da340100afa4ba84b77fab6c31d63069d3d63 Mon Sep 17 00:00:00 2001 From: Glen Whitney Date: Sun, 30 Apr 2023 12:05:51 -0400 Subject: [PATCH] fix: Always recompute cells without clocked expression with current values Prior to this change, when a cell was clocked, it was always computed with prior values of referred-to cells. This had the effect, for example, of making cells defined only with a base expression as a total of a column, for example, to be out-of-date in that they would take on the value of total of the prior values of the column, not the new current values of the column. This behavior was very counterintuitive. With this change, updates of cells that have no clocked expression are delayed until after clocked expressions have been recomputed based on prior values and their cells' current values have been updated. Then the computations of those base-only cells use all of the new current values, leaving the spreadsheet in a (maximally) self-consistent state. --- NEWS | 16 ++++- doc/teapot.lyx | 170 +++++++++++++++++++++++++++++++++++++++++---- src/common/cell.c | 5 ++ src/common/cell.h | 1 + src/common/sheet.c | 30 ++++++-- 5 files changed, 201 insertions(+), 21 deletions(-) diff --git a/NEWS b/NEWS index ca83cdd..ecf2df7 100644 --- a/NEWS +++ b/NEWS @@ -6,6 +6,13 @@ The changes compared to 2.3.0 are: o Added examples updating the life simulation to illustrate the color and computed style features. o You can compute the styles for cells, with an additional expression per cell. +o Computation of current values for cells without clocked expressions now + always depends on contemporaneous current values rather than prior values, + even when the sheet is clocked. This means, for example, an entry whose base + value is the sum of a column and which has no will always resolve to the + current sum of that column, even if the column contains clocked values. + (This is a breaking change in that prior to this modification, such a cell + when clocked would contain the sum of the prior values in that column.) o New token types: style and bool. o Comparison operators return bool rather than int (Note this can be a breaking change; you may need to wrap comparisons in int() if you are using the result @@ -21,8 +28,13 @@ o Addition of hexact float format, which allows for exact round trips to ASCII o New token type: funcall (which basically amounts to an expression). This allows parsed rather than unparsed expressions to be stored in cells, and allows macros which receive their arguments unevaluated. -o sum(), min(), and max() can now operate over their list of arguments as well - as over a block. +o New region/accumulator operation count() that counts truthy cells (rather + that just nonempty cells as n() does) added. +o All region/accumulator operations including sum(), min(), and max() can + now operate over their list of arguments as well as over a block. +o All region/accumulator operations can take an optional expression when + operating over a block that generates the values to be accumulated (rather + than just taking the values directly from the block). o Added floor(), ceil(), trunc(), and round() functions for finding integers associated with doubles, eliminating (note possible breaking change) the int conversion with two rounding directions. diff --git a/doc/teapot.lyx b/doc/teapot.lyx index 176babb..fab9eb1 100644 --- a/doc/teapot.lyx +++ b/doc/teapot.lyx @@ -346,7 +346,7 @@ name "fig:Three-Dimensional-Spread-Sheet" \end_inset -Three-Dimensional Spread Sheet Layout +Three-Dimensional Spreadsheet Layout \end_layout \end_inset @@ -794,7 +794,7 @@ The sheet is currently in reset condition and the result is 1. \begin_layout Standard After this introductory chapter, you should be familiar with the basic concepts - in spread sheets. + in spreadsheets. The next chapters explain all operations available in detail. You should read them to get an overview of the possibilities offered by @@ -4595,7 +4595,7 @@ CSV (.csv) \begin_layout Standard CSV (comma separated value) files only contain the data, not the expressions calculating it. - Many spread sheets can generate this file format and many graphics programs + Many spreadsheets can generate this file format and many graphics programs like gnuplot(1) can read it. The field separator usually is a tab or comma, strings may be enclosed in double quotes and decimal numbers have a dot to mark the fractional @@ -5315,34 +5315,170 @@ goto 5.12.4 \emph default . - \end_layout \begin_layout Section -Expressions +Sheets, Cells, and Clocking \end_layout \begin_layout Standard -Cells consist of a base (reset) expression, a clocked expression, a style - expression, and a current value. - If the sheet is currently in the reset state (the default), all cells display - their base value as current value. +A +\emph on +sheet +\emph default + is a three-dimensional collection of +\emph on +cells +\emph default +. + The slices in the X, Y, and Z directions are called the +\emph on +columns +\emph default +, +\emph on +rows +\emph default +, and +\emph on +layers +\emph default +, respectively. + At any given time, the valid coordinates for a sheet are all non-negative + integers up to some current limit in each of the X, Y, and Z directions, + which may be different in each direction. + Generally speaking, direct references to cells with negative coordinates + are an error, whereas indirect references to cells with negative coordinates + and all references to cells with coordinates beyond the current limits + act as references to empty cells with all default attributes/styles. \end_layout \begin_layout Standard -When the sheet is clocked (see Table +Each cell has three expressions, all of which are optional: a base (reset) + expression, a clocked expression, and a style expression. + It also has an associated current value. + See section +\begin_inset CommandInset ref +LatexCommand ref +reference "sec:Expressions" +plural "false" +caps "false" +noprefix "false" + +\end_inset + + for the semantics of expressions and values. + When the sheet is in the reset state (the default/initial state), all cells + set their current values to the value of their base expression; references + to other cells use those other cells' current values. + The recomputation of current values updates referred-to cells first in + an effort to produce the expected, correct values. + However, if there are circular references among cells, this update process + does not attempt to produce a fully self-consistent state, in that a given + cell is only updated once. + Hence the resulting current values can depend on the order updates occur, + and it may happen that a cell's current value is not equal to the value + of its base expression computed with current values of all cells. + This behavior may seem problematic, but in fact it's not always possible + to produce a self-consistent state. + The simplest example is if the base expression for the cell with coordinates + (0,0,0) is @(0,0,0)+1, since no number is one more than itself. + Moreover, in the absence of circular reference chains among the base expression +s, the recomputation of current values is guaranteed to reach a consistent + state for all cells. + The bottom line is that although circular references in base expressions + are not disallowed, their utility may be limited because their behavior + is relatively unpredictable. +\end_layout + +\begin_layout Standard +When a sheet is clocked (see Table \begin_inset CommandInset ref LatexCommand ref reference "tab:Key-Bindings-in" \end_inset -), the clocked expression is evaluated, using the current value of referenced - cells. - The new current value is the result of that evaluation. +), every cell with a clocked expression is clocked. + When a cell is clocked (either because the sheet was clocked or by virtue + of the clock() function documented in section +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:All-other-functions" +plural "false" +caps "false" +noprefix "false" + +\end_inset + +), its clocked expression is evaluated using the current value of all cells + referenced directly or indirectly, including cells that are also currently + being clocked. + Then the new current value of (all of the) clocked cell(s) is set to the + result of that evaluation. + Finally, the current values of the unclocked cells (which include any cells + that have no clocked expression) are recomputed based on the new current + values. + The upshot of these rules is that one can control the updating of potentially + circular references precisely: the computation of clocked expressions always + uses values of all cells, including other clocked cells, from the previous + +\begin_inset Quotes eld +\end_inset + +tick +\begin_inset Quotes erd +\end_inset + +, and the computation of base expressions uses values of all cells, including + other unclocked cells, from the current tick. +\end_layout + +\begin_layout Standard +Finally, whenever the style of a given cell is needed, for example to display + the cell, its style expression is evaluated based on the current values + of all cells to which it refers directly or indirectly. + The expression must evaluate to a style value (see subsection +\begin_inset CommandInset ref +LatexCommand ref +reference "subsec:Data-Types" +plural "false" +caps "false" +noprefix "false" + +\end_inset + +) and that value provides the style of the cell. +\end_layout + +\begin_layout Section +\begin_inset CommandInset label +LatexCommand label +name "sec:Expressions" + +\end_inset + +Expressions and values +\end_layout + +\begin_layout Standard +In +\noun on +teapot +\noun default +, values are specific data items and expressions are the formulas that can + be evaluated to produce those values. + This section describes the possible values, the allowed syntax of expressions, + and the semantics of valid expressions. \end_layout \begin_layout Subsection +\begin_inset CommandInset label +LatexCommand label +name "subsec:Data-Types" + +\end_inset + Data Types \end_layout @@ -6996,6 +7132,12 @@ sum Returns the sum of the accumulated values (recall for strings this is \end_layout \begin_layout Subsubsection +\begin_inset CommandInset label +LatexCommand label +name "subsec:All-other-functions" + +\end_inset + All other functions \end_layout @@ -10123,7 +10265,7 @@ How do I hide intermediate results? \end_layout \begin_layout Standard -If you used flat, two-dimensional spread sheets before, you are probably +If you used flat, two-dimensional spreadsheets before, you are probably used to hidden cells which contain intermediate results, global constants, scratch areas and the like. diff --git a/src/common/cell.c b/src/common/cell.c index efc983e..2c921af 100644 --- a/src/common/cell.c +++ b/src/common/cell.c @@ -50,6 +50,11 @@ Token gettok(const Cell *cell, TokVariety v) return cell->tok[v]; } +/* iterable -- does cell have iteration content? */ +bool iterable(const Cell* cell) { + return (cell != NULLCELL) && cell->tok[ITER_CONT].type != EMPTY; +} + /* locked -- is cell locked? */ bool locked(const Cell *cell) { diff --git a/src/common/cell.h b/src/common/cell.h index 40c955b..72f5e82 100644 --- a/src/common/cell.h +++ b/src/common/cell.h @@ -32,6 +32,7 @@ typedef struct typedef enum {ALTER_LABEL, PRESERVE_LABEL} LabelHandling; Token gettok(const Cell *cell, TokVariety v); +bool iterable(const Cell *cell); bool locked(const Cell *cell); bool ignored(const Cell *cell); const char *getlabel(const Cell *cell); diff --git a/src/common/sheet.c b/src/common/sheet.c index 5b26b02..c728863 100644 --- a/src/common/sheet.c +++ b/src/common/sheet.c @@ -692,14 +692,33 @@ void update(Sheet *sheet) upd_clock = true; recompvalue(sheet, w); } + bool current_changed = false; for (ALL_CELLS_IN_SHEET(sheet,i,cell)) { if (cell && cell->clock_resolving) { tfree(&(cell->tok[CURR_VAL])); cell->tok[CURR_VAL] = cell->tok[RES_VAL];; - cell->tok[RES_VAL].type = EMPTY; - cell->clock_resolving = false; + cell->tok[RES_VAL].type = EMPTY; + current_changed = true; + } + } + if (current_changed) { + /* Recompute all of the non-clocked cells so that they get the values + of the newly-changed clocked cells + */ + for (ALL_CELLS_IN_SHEET(sheet, i, cell)) { + if (cell) { + if (cell->clock_resolving) { + cell->clock_resolving = false; + } else { + cell->updated = false; + } + } + } + for (ALL_LOCS_IN_SHEET(sheet, w)) { + upd_clock = true; + recompvalue(sheet, w); } } upd_clock = false; @@ -905,10 +924,11 @@ void clk(Sheet *sheet, const Location at) { assert(sheet != (Sheet*)0); assert(IN_OCTANT(at)); - assert(LOC_WITHIN(sheet,at)); - if (CELL_AT(sheet,at)) + assert(LOC_WITHIN(sheet, at)); + Cell* toclock = CELL_AT(sheet, at); + if (iterable(toclock)) { - CELL_AT(sheet,at)->clock_requested = true; + toclock->clock_requested = true; sheet->clk = true; } }