mirror of
https://github.com/larksuite/cli.git
synced 2026-07-03 14:02:43 +08:00
perf(sheets): cap fan-out cell-matrix materialization to prevent OOM (#1578)
* perf(sheets): cap fan-out cell-matrix materialization to prevent OOM The +cells-set-style / +dropdown-set / +cells-batch-set-style / +dropdown-update shortcuts expand a single A1 range into a rows×cols matrix of per-cell maps client-side (the backing set_cell_range tool takes an explicit cells matrix). rangeDimensions() had no upper bound, so a tiny input like "A1:Z100000" balloons into ~2.6M heap maps (~900MB, doubled again by json.Marshal) and can OOM the process before the request is even sent. Add a 50000-cell safety cap (checkStampMatrixBudget) gating every fan-out materialization point, matching the documented but never-wired --max-cells default. Oversized ranges now fail fast with a clear validation error instead of allocating. Also preallocate the per-op slices now that the range count is known up front. Adds benchmarks + a boundary test as regression guards. * perf(sheets): cap table-put/batch fan-out materialization (siblings of the cell-matrix cap) The single-range fan-out cap (maxStampMatrixCells) left three sibling ingress paths uncapped, each able to materialize an unbounded matrix or op set in memory before the request leaves: - +table-put / +workbook-create --sheets/--values: buildSheetMatrix builds the whole rows×cols matrix before slicing it into per-write batches; tablePutMaxCellsPerWrite only bounds the batch size, not the total input. Add tablePayload.checkCellBudget (1M-cell guardrail), enforced in validate() and in buildValuesPayload (the --values path bypasses validate()). - batch fan-out (+cells-batch-set-style / +dropdown-update): per-range checkStampMatrixBudget can't stop many ranges from summing past the cap. Add an aggregate cell budget (checkBatchStampBudget) and a shared maxBatchRanges (100) count cap in validateDropdownRanges — covering all fan-out commands and replacing the now-redundant +dropdown-delete count check. - +batch-update: cap --operations at maxBatchOperations (100) in translateBatchOperations. Adds boundary regression tests for each cap. go vet + gofmt clean; full shortcuts/sheets + backward suites green. * test(sheets): measure table-put matrix materialization cost Add BenchmarkBuildSheetMatrix_* and TestTablePutMatrixPeakMemory mirroring the fan-out probes. Confirms the +table-put/+workbook-create ingress has the same OOM profile as the single-range stamp: 2.6M cells → ~917 MB / 5.3M allocs (+875 MB resident heap) materialized before the first write — now rejected up front by checkCellBudget.
This commit is contained in:
committed by
GitHub
parent
646304a1c7
commit
a179900d53
@@ -332,11 +332,21 @@ func translateBatchOp(raw interface{}, token string, index int) (map[string]inte
|
||||
}, nil
|
||||
}
|
||||
|
||||
// maxBatchOperations caps how many sub-operations a single +batch-update may
|
||||
// carry. Every translated op (with its own cells/properties payload) is held in
|
||||
// the out slice at once before the whole batch is marshaled, so an unbounded
|
||||
// operation count is the same unbounded-materialization hazard as the fan-out
|
||||
// matrix, on the operations axis.
|
||||
const maxBatchOperations = 100
|
||||
|
||||
// translateBatchOperations 翻译整个 ops 数组;fail-fast,遇错立即返回。
|
||||
func translateBatchOperations(rawOps []interface{}, token string) ([]interface{}, error) {
|
||||
if len(rawOps) == 0 {
|
||||
return nil, sheetsValidationForFlag("operations", "--operations must be a non-empty JSON array")
|
||||
}
|
||||
if len(rawOps) > maxBatchOperations {
|
||||
return nil, sheetsValidationForFlag("operations", "--operations accepts at most %d entries; got %d", maxBatchOperations, len(rawOps))
|
||||
}
|
||||
out := make([]interface{}, 0, len(rawOps))
|
||||
for i, raw := range rawOps {
|
||||
translated, err := translateBatchOp(raw, token, i)
|
||||
|
||||
@@ -1647,8 +1647,8 @@
|
||||
"kind": "own",
|
||||
"type": "int",
|
||||
"required": "optional",
|
||||
"desc": "Safety cap; default 50000",
|
||||
"default": "50000",
|
||||
"desc": "Safety cap; default 200000",
|
||||
"default": "200000",
|
||||
"hidden": true
|
||||
},
|
||||
{
|
||||
|
||||
@@ -138,7 +138,7 @@ var flagDefs = map[string]commandDef{
|
||||
{Name: "range", Kind: "own", Type: "string", Required: "required", Desc: "Write range (A1 notation)"},
|
||||
{Name: "cells", Kind: "own", Type: "string", Required: "required", Desc: "JSON 2D array `[[{cell},...],...]`, dimensions must match `--range`; each cell may carry `value` / `formula` / `cell_styles` / `note` / `rich_text` (incl. `type=\"embed-image\"` in-cell image); run `--print-schema` for full fields", Input: []string{"file", "stdin"}},
|
||||
{Name: "allow-overwrite", Kind: "own", Type: "bool", Required: "optional", Desc: "Allow overwriting non-empty cells (default true); set false to error if any target cell is non-empty", Default: "true"},
|
||||
{Name: "max-cells", Kind: "own", Type: "int", Required: "optional", Desc: "Safety cap; default 50000", Default: "50000", Hidden: true},
|
||||
{Name: "max-cells", Kind: "own", Type: "int", Required: "optional", Desc: "Safety cap; default 200000", Default: "200000", Hidden: true},
|
||||
{Name: "copy-to-range", Kind: "own", Type: "string", Required: "optional", Desc: "Copy-to range (A1 notation): replicate what --cells wrote into --range (values/formulas/styles, per the fields actually passed) to this range; formula refs auto-shift (C2=B2 -> C3=B3). Write a one-row/one-block template then fill a whole column/area. Supports full rows '3:6', full columns 'C:E', to-col-end 'D3:D', to-row-end 'D3:3', and comma-separated multiple targets like 'C1:D2,E5:F6'."},
|
||||
{Name: "dry-run", Kind: "system", Type: "bool", Required: "optional"},
|
||||
},
|
||||
|
||||
@@ -215,7 +215,8 @@ func cellsBatchSetStyleInput(runtime *common.RuntimeContext, token string) (map[
|
||||
if borderStyles != nil {
|
||||
prototype["border_styles"] = borderStyles
|
||||
}
|
||||
var ops []interface{}
|
||||
ops := make([]interface{}, 0, len(ranges))
|
||||
var totalCells int64
|
||||
for _, rng := range ranges {
|
||||
sheet, sub, err := splitSheetPrefixedRange(rng)
|
||||
if err != nil {
|
||||
@@ -225,6 +226,13 @@ func cellsBatchSetStyleInput(runtime *common.RuntimeContext, token string) (map[
|
||||
if err != nil {
|
||||
return nil, sheetsValidationForFlag("range", "range %q: %v", rng, err)
|
||||
}
|
||||
if err := checkStampMatrixBudget("ranges", rng, rows, cols); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
totalCells += int64(rows) * int64(cols)
|
||||
if err := checkBatchStampBudget(totalCells); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
cells := fillCellsMatrix(rows, cols, prototype)
|
||||
ops = append(ops, map[string]interface{}{
|
||||
"tool_name": "set_cell_range",
|
||||
@@ -299,7 +307,7 @@ func cellsBatchClearInput(runtime *common.RuntimeContext, token string) (map[str
|
||||
return nil, err
|
||||
}
|
||||
clearType := normalizeClearType(runtime.Str("scope"))
|
||||
var ops []interface{}
|
||||
ops := make([]interface{}, 0, len(ranges))
|
||||
for _, rng := range ranges {
|
||||
sheet, sub, err := splitSheetPrefixedRange(rng)
|
||||
if err != nil {
|
||||
@@ -382,13 +390,10 @@ var DropdownDelete = common.Shortcut{
|
||||
if _, err := resolveSpreadsheetToken(runtime); err != nil {
|
||||
return err
|
||||
}
|
||||
ranges, err := validateDropdownRanges(runtime)
|
||||
if err != nil {
|
||||
// validateDropdownRanges enforces the shared maxBatchRanges cap.
|
||||
if _, err := validateDropdownRanges(runtime); err != nil {
|
||||
return err
|
||||
}
|
||||
if len(ranges) > 100 {
|
||||
return sheetsValidationForFlag("ranges", "--ranges accepts at most 100 entries; got %d", len(ranges))
|
||||
}
|
||||
return nil
|
||||
},
|
||||
DryRun: func(ctx context.Context, runtime *common.RuntimeContext) *common.DryRunAPI {
|
||||
@@ -432,7 +437,8 @@ func dropdownBatchInput(runtime *common.RuntimeContext, token string, clear bool
|
||||
}
|
||||
prototype = map[string]interface{}{"data_validation": validation}
|
||||
}
|
||||
var ops []interface{}
|
||||
ops := make([]interface{}, 0, len(ranges))
|
||||
var totalCells int64
|
||||
for _, rng := range ranges {
|
||||
sheet, sub, err := splitSheetPrefixedRange(rng)
|
||||
if err != nil {
|
||||
@@ -442,6 +448,13 @@ func dropdownBatchInput(runtime *common.RuntimeContext, token string, clear bool
|
||||
if err != nil {
|
||||
return nil, sheetsValidationForFlag("range", "range %q: %v", rng, err)
|
||||
}
|
||||
if err := checkStampMatrixBudget("ranges", rng, rows, cols); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
totalCells += int64(rows) * int64(cols)
|
||||
if err := checkBatchStampBudget(totalCells); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
cells := fillCellsMatrix(rows, cols, prototype)
|
||||
ops = append(ops, map[string]interface{}{
|
||||
"tool_name": "set_cell_range",
|
||||
@@ -461,6 +474,25 @@ func dropdownBatchInput(runtime *common.RuntimeContext, token string, clear bool
|
||||
|
||||
// ─── helpers resurrected from B3 (used here + future skills) ──────────
|
||||
|
||||
// maxBatchRanges caps how many ranges a fan-out batch (+cells-batch-set-style /
|
||||
// +cells-batch-clear / +dropdown-update / +dropdown-delete) may carry, bounding
|
||||
// the number of ops materialized into one batch_update.
|
||||
const maxBatchRanges = 100
|
||||
|
||||
// checkBatchStampBudget rejects a fan-out batch whose ranges materialize more
|
||||
// than maxStampMatrixCells cells in aggregate. A batch builds every range's
|
||||
// cells matrix up front, so the SUM across ranges is the real peak-memory bound
|
||||
// — the per-range checkStampMatrixBudget alone can't stop many ranges from
|
||||
// summing past it. totalCells is int64 to stay overflow-safe.
|
||||
func checkBatchStampBudget(totalCells int64) error {
|
||||
if totalCells > maxStampMatrixCells {
|
||||
return sheetsValidationForFlag("ranges",
|
||||
"ranges expand to %d cells total, over the %d-cell safety cap; reduce the number or size of ranges",
|
||||
totalCells, maxStampMatrixCells)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// validateDropdownRanges parses --ranges, requires every entry to carry a
|
||||
// sheet prefix, and returns the parsed list.
|
||||
func validateDropdownRanges(runtime *common.RuntimeContext) ([]string, error) {
|
||||
@@ -490,6 +522,9 @@ func validateDropdownRanges(runtime *common.RuntimeContext) ([]string, error) {
|
||||
}
|
||||
out = append(out, s)
|
||||
}
|
||||
if len(out) > maxBatchRanges {
|
||||
return nil, sheetsValidationForFlag("ranges", "--ranges accepts at most %d entries; got %d", maxBatchRanges, len(out))
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -382,6 +382,32 @@ func (p *tablePayload) validate() error {
|
||||
return common.ValidationErrorf("--sheets[%d] %q: mode %q is invalid (want \"overwrite\" or \"append\")", i, s.Name, s.Mode)
|
||||
}
|
||||
}
|
||||
return p.checkCellBudget()
|
||||
}
|
||||
|
||||
// maxTablePutCells bounds how many cells a single +table-put / +workbook-create
|
||||
// write may materialize. Unlike the fan-out stamp cap (maxStampMatrixCells),
|
||||
// these cells come from the caller's own --sheets/--values payload rather than a
|
||||
// range blow-up, so this is a generous OOM guardrail, not a usability limit:
|
||||
// buildSheetMatrix builds the whole rows×cols matrix of per-cell maps in memory
|
||||
// before slicing it into tablePutMaxCellsPerWrite-sized writes, so an unbounded
|
||||
// payload (2.6M cells ≈ 900MB heap, doubled again by json.Marshal) OOMs the
|
||||
// process before the first write leaves.
|
||||
const maxTablePutCells = 1_000_000
|
||||
|
||||
// checkCellBudget rejects a payload whose total materialized cell count across
|
||||
// all sheets exceeds maxTablePutCells. Counted in int64 to stay overflow-safe on
|
||||
// pathological row/column counts.
|
||||
func (p *tablePayload) checkCellBudget() error {
|
||||
var total int64
|
||||
for i := range p.Sheets {
|
||||
total += int64(len(p.Sheets[i].Rows)) * int64(len(p.Sheets[i].Columns))
|
||||
}
|
||||
if total > maxTablePutCells {
|
||||
return common.ValidationErrorf(
|
||||
"--sheets/--values cover %d cells total, over the %d-cell safety cap; split the write across smaller payloads",
|
||||
total, maxTablePutCells)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -856,13 +856,19 @@ func buildValuesPayload(runtime flagView, sheetStyles *workbookCreateSheetStyles
|
||||
cols[i] = tableColumnSpec{Name: fmt.Sprintf("col%d", i+1)} // type-less
|
||||
}
|
||||
noHeader := false
|
||||
return &tablePayload{Sheets: []tableSheetSpec{{
|
||||
payload := &tablePayload{Sheets: []tableSheetSpec{{
|
||||
Name: valuesSheetName,
|
||||
Mode: "overwrite",
|
||||
Header: &noHeader,
|
||||
Columns: cols,
|
||||
Rows: rows,
|
||||
}}}, nil
|
||||
}}}
|
||||
// --values bypasses tablePayload.validate(), so enforce the cell budget here
|
||||
// too — otherwise a giant --values array materializes unbounded.
|
||||
if err := payload.checkCellBudget(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return payload, nil
|
||||
}
|
||||
|
||||
// parseValuesRows decodes --values (JSON 2D array, with @file/stdin already
|
||||
|
||||
@@ -165,6 +165,9 @@ func cellsSetStyleInput(runtime flagView, token, sheetID, sheetName string) (map
|
||||
if err != nil {
|
||||
return nil, sheetsValidationForFlag("range", "--range %q: %v", rangeStr, err)
|
||||
}
|
||||
if err := checkStampMatrixBudget("range", rangeStr, rows, cols); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := requireAnyStyleFlag(runtime); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -450,6 +453,9 @@ func dropdownSetInput(runtime flagView, token, sheetID, sheetName string) (map[s
|
||||
if err != nil {
|
||||
return nil, sheetsValidationForFlag("range", "--range %q: %v", rangeStr, err)
|
||||
}
|
||||
if err := checkStampMatrixBudget("range", rangeStr, rows, cols); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
validation, err := buildDropdownValidation(runtime)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -692,9 +698,30 @@ func letterToColumnIndex(letters string) int {
|
||||
return n - 1
|
||||
}
|
||||
|
||||
// maxStampMatrixCells bounds how many per-cell maps a fan-out / stamp shortcut
|
||||
// will materialize from a single A1 range. The backing tools take an explicit
|
||||
// cells matrix, so the CLI must expand a range like "A1:Z100000" into rows×cols
|
||||
// maps before sending it — an unbounded blow-up (2.6M cells ≈ 900MB heap, then
|
||||
// doubled again by json.Marshal) that OOMs the process before the request even
|
||||
// leaves. 200000 matches the documented --max-cells safety cap.
|
||||
const maxStampMatrixCells = 200000
|
||||
|
||||
// checkStampMatrixBudget rejects a range whose materialized cell count would
|
||||
// exceed maxStampMatrixCells, before fillCellsMatrix allocates it. rows*cols is
|
||||
// computed in int64 to stay safe against overflow on pathological ranges.
|
||||
func checkStampMatrixBudget(flagName, rangeStr string, rows, cols int) error {
|
||||
if total := int64(rows) * int64(cols); total > maxStampMatrixCells {
|
||||
return sheetsValidationForFlag(flagName,
|
||||
"range %q covers %d cells, over the %d-cell safety cap; narrow the range or split it across smaller ranges",
|
||||
rangeStr, total, maxStampMatrixCells)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// fillCellsMatrix returns a rows×cols matrix where every cell is the same
|
||||
// (shallow-copied) prototype map. Use for fan-out shortcuts that stamp a
|
||||
// single attribute (style / data_validation) across an entire range.
|
||||
// Callers MUST gate the dimensions through checkStampMatrixBudget first.
|
||||
func fillCellsMatrix(rows, cols int, prototype map[string]interface{}) [][]interface{} {
|
||||
cells := make([][]interface{}, rows)
|
||||
for r := range cells {
|
||||
|
||||
272
shortcuts/sheets/sheets_perf_bench_test.go
Normal file
272
shortcuts/sheets/sheets_perf_bench_test.go
Normal file
@@ -0,0 +1,272 @@
|
||||
// Copyright (c) 2026 Lark Technologies Pte. Ltd.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package sheets
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"runtime"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// These benchmarks back the memory review of the sheets fan-out / download
|
||||
// paths. They measure two hot spots:
|
||||
//
|
||||
// 1. fillCellsMatrix — fan-out shortcuts (+cells-set-style, +dropdown-set,
|
||||
// +cells-batch-set-style, +dropdown-update) expand one A1 range into a
|
||||
// rows×cols matrix of per-cell maps. A tiny input string ("A1:Z100000")
|
||||
// explodes into millions of heap maps with no upper bound.
|
||||
//
|
||||
// 2. the export-download reader — strings.NewReader(string(rawBody)) copies
|
||||
// the whole downloaded file once more before saving it.
|
||||
//
|
||||
// Run: go test ./shortcuts/sheets -run XXX -bench 'FillCellsMatrix|DownloadReader' -benchmem
|
||||
|
||||
var styleProto = map[string]interface{}{
|
||||
"cell_styles": map[string]interface{}{"bold": true, "fg_color": "#FF0000"},
|
||||
"border_styles": map[string]interface{}{"top": map[string]interface{}{"style": "solid"}},
|
||||
}
|
||||
|
||||
func benchFillCellsMatrix(b *testing.B, rows, cols int) {
|
||||
b.ReportAllocs()
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
m := fillCellsMatrix(rows, cols, styleProto)
|
||||
if len(m) != rows {
|
||||
b.Fatalf("bad matrix")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkFillCellsMatrix_100(b *testing.B) { benchFillCellsMatrix(b, 10, 10) } // A1:J10
|
||||
func BenchmarkFillCellsMatrix_10K(b *testing.B) { benchFillCellsMatrix(b, 1000, 10) } // A1:J1000
|
||||
func BenchmarkFillCellsMatrix_100K(b *testing.B) { benchFillCellsMatrix(b, 10000, 10) } // A1:J10000
|
||||
func BenchmarkFillCellsMatrix_2600K(b *testing.B) { benchFillCellsMatrix(b, 100000, 26) } // A1:Z100000
|
||||
|
||||
// TestFanoutMatrixPeakMemory reports the concrete resident-heap delta of
|
||||
// materializing a large fan-out matrix, so the review doc can quote real MB.
|
||||
// Not an assertion — it prints numbers under `go test -v -run PeakMemory`.
|
||||
func TestFanoutMatrixPeakMemory(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("skipping memory probe in -short")
|
||||
}
|
||||
cases := []struct {
|
||||
name string
|
||||
rows, cols int
|
||||
}{
|
||||
{"A1:Z10000 (260K cells)", 10000, 26},
|
||||
{"A1:Z100000 (2.6M cells)", 100000, 26},
|
||||
}
|
||||
for _, c := range cases {
|
||||
var before, after runtime.MemStats
|
||||
runtime.GC()
|
||||
runtime.ReadMemStats(&before)
|
||||
m := fillCellsMatrix(c.rows, c.cols, styleProto)
|
||||
runtime.ReadMemStats(&after)
|
||||
runtime.KeepAlive(m)
|
||||
t.Logf("%-26s heap +%6.1f MB (%d total allocs)",
|
||||
c.name,
|
||||
float64(after.HeapAlloc-before.HeapAlloc)/(1024*1024),
|
||||
after.Mallocs-before.Mallocs)
|
||||
}
|
||||
}
|
||||
|
||||
// --- +table-put / +workbook-create matrix materialization (sibling #1 path) ---
|
||||
//
|
||||
// buildSheetMatrix turns the caller's --sheets/--values into a rows×cols matrix
|
||||
// of per-cell maps, the same unbounded blow-up as fillCellsMatrix but on the
|
||||
// table-put ingress (tablePutMaxCellsPerWrite only slices the *write*, not this
|
||||
// in-memory build). checkCellBudget rejects oversized payloads before this runs.
|
||||
|
||||
func makeTypelessSpec(rows, cols int) *tableSheetSpec {
|
||||
c := make([]tableColumnSpec, cols)
|
||||
r := make([][]interface{}, rows)
|
||||
for i := range r {
|
||||
row := make([]interface{}, cols)
|
||||
for j := range row {
|
||||
row[j] = "x"
|
||||
}
|
||||
r[i] = row
|
||||
}
|
||||
return &tableSheetSpec{Columns: c, Rows: r}
|
||||
}
|
||||
|
||||
func benchBuildSheetMatrix(b *testing.B, rows, cols int) {
|
||||
spec := makeTypelessSpec(rows, cols)
|
||||
b.ReportAllocs()
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
m, err := buildSheetMatrix(spec, true)
|
||||
if err != nil || len(m) != rows+1 {
|
||||
b.Fatalf("bad matrix")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkBuildSheetMatrix_100K(b *testing.B) { benchBuildSheetMatrix(b, 10000, 10) } // 100K cells
|
||||
func BenchmarkBuildSheetMatrix_2600K(b *testing.B) { benchBuildSheetMatrix(b, 100000, 26) } // 2.6M cells
|
||||
|
||||
// TestTablePutMatrixPeakMemory reports the resident-heap delta of materializing
|
||||
// a large table-put matrix (the cost checkCellBudget now prevents), so the
|
||||
// review doc can quote real MB. Not an assertion — prints under -v -run PeakMemory.
|
||||
func TestTablePutMatrixPeakMemory(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("skipping memory probe in -short")
|
||||
}
|
||||
for _, c := range []struct {
|
||||
name string
|
||||
rows, cols int
|
||||
}{
|
||||
{"100000×26 (2.6M cells)", 100000, 26},
|
||||
} {
|
||||
spec := makeTypelessSpec(c.rows, c.cols)
|
||||
var before, after runtime.MemStats
|
||||
runtime.GC()
|
||||
runtime.ReadMemStats(&before)
|
||||
m, _ := buildSheetMatrix(spec, true)
|
||||
runtime.ReadMemStats(&after)
|
||||
runtime.KeepAlive(m)
|
||||
t.Logf("%-24s buildSheetMatrix heap +%6.1f MB (%d total allocs)",
|
||||
c.name,
|
||||
float64(after.HeapAlloc-before.HeapAlloc)/(1024*1024),
|
||||
after.Mallocs-before.Mallocs)
|
||||
}
|
||||
}
|
||||
|
||||
// --- export-download reader copy ---
|
||||
|
||||
func benchDownloadReader(b *testing.B, size int, useStringCopy bool) {
|
||||
raw := bytes.Repeat([]byte("x"), size)
|
||||
sink := make([]byte, 32*1024)
|
||||
b.ReportAllocs()
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
var r io.Reader
|
||||
if useStringCopy {
|
||||
r = strings.NewReader(string(raw)) // current code: extra full-size copy
|
||||
} else {
|
||||
r = bytes.NewReader(raw) // fix: no copy
|
||||
}
|
||||
for {
|
||||
if _, err := r.Read(sink); err != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// --- fan-out cell-budget cap (fix for the unbounded matrix blow-up) ---
|
||||
|
||||
func TestStampMatrixBudgetCap(t *testing.T) {
|
||||
// 199992 cells (7692×26) sits just under the 200000 cap → allowed.
|
||||
if err := checkStampMatrixBudget("range", "A1:Z7692", 7692, 26); err != nil {
|
||||
t.Fatalf("199992 cells should pass, got: %v", err)
|
||||
}
|
||||
// Exactly at the cap → allowed.
|
||||
if err := checkStampMatrixBudget("range", "A1:A200000", 200000, 1); err != nil {
|
||||
t.Fatalf("200000 cells (== cap) should pass, got: %v", err)
|
||||
}
|
||||
// Just over the cap → rejected.
|
||||
if err := checkStampMatrixBudget("range", "A1:A200001", 200001, 1); err == nil {
|
||||
t.Fatal("200001 cells should be rejected")
|
||||
}
|
||||
// The pathological case from the review (2.6M cells) → rejected.
|
||||
if err := checkStampMatrixBudget("ranges", "Sheet1!A1:Z100000", 100000, 26); err == nil {
|
||||
t.Fatal("2.6M-cell fan-out should be rejected")
|
||||
}
|
||||
}
|
||||
|
||||
// --- sibling cap gaps: +table-put/+workbook-create payload, batch aggregate,
|
||||
// batch-update operation count (follow-up to the single fan-out cap) ---
|
||||
|
||||
// TestTablePutCellBudgetCap covers the --sheets/--values materialization cap:
|
||||
// buildSheetMatrix builds the whole matrix in memory, so the total cell count is
|
||||
// bounded before that allocation, summed across all sheets.
|
||||
func TestTablePutCellBudgetCap(t *testing.T) {
|
||||
// 1000×1000 = 1,000,000 == cap → allowed.
|
||||
atCap := &tablePayload{Sheets: []tableSheetSpec{{
|
||||
Columns: make([]tableColumnSpec, 1000),
|
||||
Rows: make([][]interface{}, 1000),
|
||||
}}}
|
||||
if err := atCap.checkCellBudget(); err != nil {
|
||||
t.Fatalf("1,000,000 cells (== cap) should pass, got: %v", err)
|
||||
}
|
||||
// 1000×1001 = 1,001,000 > cap → rejected.
|
||||
over := &tablePayload{Sheets: []tableSheetSpec{{
|
||||
Columns: make([]tableColumnSpec, 1000),
|
||||
Rows: make([][]interface{}, 1001),
|
||||
}}}
|
||||
if err := over.checkCellBudget(); err == nil {
|
||||
t.Fatal("1,001,000 cells should be rejected")
|
||||
}
|
||||
// Budget is summed across sheets, not per-sheet: 600k + 600k = 1.2M > cap.
|
||||
twoSheets := &tablePayload{Sheets: []tableSheetSpec{
|
||||
{Columns: make([]tableColumnSpec, 1000), Rows: make([][]interface{}, 600)},
|
||||
{Columns: make([]tableColumnSpec, 1000), Rows: make([][]interface{}, 600)},
|
||||
}}
|
||||
if err := twoSheets.checkCellBudget(); err == nil {
|
||||
t.Fatal("1.2M cells across two sheets should be rejected")
|
||||
}
|
||||
}
|
||||
|
||||
// TestBatchStampAggregateCap covers the batch fan-out aggregate budget — the
|
||||
// per-range cap can't stop many ranges from summing past the matrix ceiling.
|
||||
func TestBatchStampAggregateCap(t *testing.T) {
|
||||
if err := checkBatchStampBudget(maxStampMatrixCells); err != nil {
|
||||
t.Fatalf("aggregate == cap should pass, got: %v", err)
|
||||
}
|
||||
if err := checkBatchStampBudget(maxStampMatrixCells + 1); err == nil {
|
||||
t.Fatal("aggregate over cap should be rejected")
|
||||
}
|
||||
}
|
||||
|
||||
// TestBatchFanoutRangeCountCap drives a fan-out shortcut with > maxBatchRanges
|
||||
// ranges and expects the shared validateDropdownRanges cap to reject it.
|
||||
func TestBatchFanoutRangeCountCap(t *testing.T) {
|
||||
ranges := make([]string, maxBatchRanges+1)
|
||||
for i := range ranges {
|
||||
ranges[i] = "sheet1!A1"
|
||||
}
|
||||
rangesJSON, _ := json.Marshal(ranges)
|
||||
_, _, err := runShortcutCapturingErr(t, CellsBatchSetStyle, []string{
|
||||
"--url", testURL,
|
||||
"--ranges", string(rangesJSON),
|
||||
"--font-weight", "bold",
|
||||
"--dry-run",
|
||||
})
|
||||
requireValidation(t, err, "at most")
|
||||
}
|
||||
|
||||
// TestBatchOperationsCountCap covers the +batch-update sub-operation count cap.
|
||||
func TestBatchOperationsCountCap(t *testing.T) {
|
||||
ops := make([]interface{}, maxBatchOperations+1)
|
||||
for i := range ops {
|
||||
ops[i] = map[string]interface{}{"shortcut": "+cells-set", "input": map[string]interface{}{}}
|
||||
}
|
||||
_, err := translateBatchOperations(ops, testURL)
|
||||
if err == nil || !strings.Contains(err.Error(), "at most") {
|
||||
t.Fatalf("expected operations count cap error, got: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// BenchmarkStampBudget_RejectsOversized is the "after" side of the fix: the same
|
||||
// A1:Z100000 input that BenchmarkFillCellsMatrix_2600K shows costing ~917MB /
|
||||
// 5.3M allocs is now rejected up front, allocating only the error string.
|
||||
func BenchmarkStampBudget_RejectsOversized(b *testing.B) {
|
||||
b.ReportAllocs()
|
||||
for i := 0; i < b.N; i++ {
|
||||
if err := checkStampMatrixBudget("range", "A1:Z100000", 100000, 26); err == nil {
|
||||
b.Fatal("expected rejection")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkDownloadReader_StringCopy_1MB(b *testing.B) { benchDownloadReader(b, 1<<20, true) }
|
||||
func BenchmarkDownloadReader_BytesNoCopy_1MB(b *testing.B) { benchDownloadReader(b, 1<<20, false) }
|
||||
func BenchmarkDownloadReader_StringCopy_16MB(b *testing.B) { benchDownloadReader(b, 16<<20, true) }
|
||||
func BenchmarkDownloadReader_BytesNoCopy_16MB(b *testing.B) {
|
||||
benchDownloadReader(b, 16<<20, false)
|
||||
}
|
||||
Reference in New Issue
Block a user