From 086876d2720c90ee49172f84eba39c8a08d5db45 Mon Sep 17 00:00:00 2001 From: xiongyuanwen-byted Date: Wed, 24 Jun 2026 13:10:39 +0800 Subject: [PATCH] fix(sheets): harden +table-put / +table-get input validation and round-trip safety MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Four review-flagged correctness gaps in table I/O, all bundled because they touch the same file: 1. --sheets accepted trailing data after the first JSON value (json.Decoder does not surface that, unlike json.Unmarshal). A new decoderExpectEOF helper rejects e.g. `--sheets '{...} oops'` with a typed validation error instead of letting the leading object pass through and surface as a confusing downstream failure. 2. +table-get with a duplicate header (e.g. `amount, amount`) used to read back successfully — the dtypes map silently collapsed to one entry — and only failed later on +table-put because the writer rejects duplicate column names. Fail fast at read time with an actionable hint to rename or pass --no-header. --no-header mode is exempt (fallback col names are always unique). 3. +table-put dry-run rendered an invalid range like A1:C0 when header=false with rows=[]. tablePutFullRange returns "" for an empty matrix or zero columns instead of building a degenerate rectangle. 4. +table-get with --sheet-id and a get_workbook_structure miss (read failure or selector mismatch) used to return a target with name="", which then broke +table-get → +table-put round-trip (the writer requires a non-empty sheet name). Fall back to using the id as the name. End-to-end verified against a real spreadsheet: trailing data, duplicate header, and --no-header fallback all behave as advertised. --- shortcuts/sheets/lark_sheet_table_io.go | 55 +++++++++++++-- shortcuts/sheets/lark_sheet_table_io_test.go | 72 ++++++++++++++++++++ 2 files changed, 123 insertions(+), 4 deletions(-) diff --git a/shortcuts/sheets/lark_sheet_table_io.go b/shortcuts/sheets/lark_sheet_table_io.go index fc1346e8..4f40ff71 100644 --- a/shortcuts/sheets/lark_sheet_table_io.go +++ b/shortcuts/sheets/lark_sheet_table_io.go @@ -6,7 +6,9 @@ package sheets import ( "context" "encoding/json" + "errors" "fmt" + "io" "math" "strconv" "strings" @@ -232,6 +234,21 @@ func typeToDtype(typ string) string { } } +// decoderExpectEOF ensures the decoder has nothing left to read after a +// successful Decode. json.Decoder accepts trailing non-whitespace after the +// first JSON value (unlike json.Unmarshal), so a payload like `{...} trailing` +// would silently be treated as the leading object only. Use this after the +// first Decode to surface the trailing data as a validation error. +func decoderExpectEOF(dec *json.Decoder) error { + var trailing json.RawMessage + if err := dec.Decode(&trailing); err == nil { + return fmt.Errorf("trailing data after JSON value") //nolint:forbidigo // intermediate error; the caller wraps it into a typed --sheets/--values validation error + } else if !errors.Is(err, io.EOF) { + return fmt.Errorf("trailing data after JSON value: %w", err) //nolint:forbidigo // intermediate error; the caller wraps it into a typed --sheets/--values validation error + } + return nil +} + // parseTablePutPayload reads --sheets (JSON, supports @file / stdin) into a // validated payload. UseNumber keeps numeric cells as json.Number so large // integers (order IDs, etc.) survive without precision loss or scientific @@ -252,6 +269,12 @@ func parseTablePutPayload(runtime flagView) (*tablePayload, error) { if err := dec.Decode(&wire); err != nil { return nil, common.ValidationErrorf("--sheets: invalid JSON: %w", err) } + // Reject trailing non-whitespace after the first JSON value: json.Decoder + // accepts it silently (unlike json.Unmarshal), so e.g. `--sheets '{...} oops'` + // would otherwise pass Validate and surface as confusing downstream errors. + if err := decoderExpectEOF(dec); err != nil { + return nil, common.ValidationErrorf("--sheets: %v", err).WithCause(err) + } p := &tablePayload{Sheets: make([]tableSheetSpec, 0, len(wire.Sheets))} for i := range wire.Sheets { spec, err := wire.Sheets[i].normalize(i) @@ -579,8 +602,13 @@ func sheetAnchor(s *tableSheetSpec) (anchor string, col0, row0 int, err error) { } // tablePutFullRange is the A1 rectangle the whole matrix (header + data) -// occupies, for reporting in the result / dry-run. +// occupies, for reporting in the result / dry-run. Returns "" when there is +// nothing to write (e.g. header=false with no data rows) — the previous +// formula would have produced an invalid trailing row like "A1:C0". func tablePutFullRange(s *tableSheetSpec, totalRows int) string { + if totalRows <= 0 || len(s.Columns) == 0 { + return "" + } _, col0, row0, err := sheetAnchor(s) if err != nil { return strings.TrimSpace(s.StartCell) @@ -1124,8 +1152,11 @@ func tableGetTargets(ctx context.Context, runtime *common.RuntimeContext, token // Single-sheet selector path can degrade gracefully without dimensions // (the probe falls back to the A1 anchor); the whole-workbook path can't // enumerate sheets without the structure, so it must surface the error. + // Name doubles as id for --sheet-id so the output spec is never nameless + // (an empty name would break +table-get → +table-put round-trip — the + // writer requires a non-empty sheet name). if id != "" { - return []tableGetSheet{{id: id}}, nil + return []tableGetSheet{{id: id, name: id}}, nil } if name != "" { return []tableGetSheet{{name: name}}, nil @@ -1138,7 +1169,8 @@ func tableGetTargets(ctx context.Context, runtime *common.RuntimeContext, token // Selector path: find the matching sheet to pick up its dimensions (and // backfill name when only --sheet-id was given). If no row matches, fall // back to a dimensionless target so the read still proceeds via the A1 - // anchor rather than erroring on a structure/selector mismatch. + // anchor rather than erroring on a structure/selector mismatch — same + // id-as-name backfill applies so the output spec round-trips. if id != "" || name != "" { for _, r := range raw { sid, sname, rc, cc := tableGetSheetMeta(r) @@ -1147,7 +1179,7 @@ func tableGetTargets(ctx context.Context, runtime *common.RuntimeContext, token } } if id != "" { - return []tableGetSheet{{id: id}}, nil + return []tableGetSheet{{id: id, name: id}}, nil } return []tableGetSheet{{name: name}}, nil } @@ -1254,10 +1286,25 @@ func readSheetAsSpec(ctx context.Context, runtime *common.RuntimeContext, token colTypes := make([]string, ncols) dtypes := make(map[string]interface{}, ncols) formats := map[string]interface{}{} + // Duplicate header names break +table-get → +table-put round-trip: the dtypes + // map (keyed by name) silently collapses to a single entry and the writer + // later rejects the duplicate columns in --sheets validation. Fail fast with + // an actionable hint when the source sheet actually has duplicate headers. + // noHeader mode is exempt because tableGetColumnName falls back to positional + // col names which are always unique. + seenNames := map[string]int{} for c := 0; c < ncols; c++ { typ, format := inferColumnType(dataRows, c) colTypes[c] = typ name := tableGetColumnName(headerRow, c, noHeader) + if !noHeader { + if prev, dup := seenNames[name]; dup { + return nil, common.ValidationErrorf( + "sheet %q: duplicate header column name %q at columns %d and %d; this would break the +table-get → +table-put round-trip. Rename the headers or pass --no-header to read by position (col1/col2/…).", + t.name, name, prev+1, c+1) + } + seenNames[name] = c + } columnNames[c] = name dtypes[name] = typeToDtype(typ) // Only emit a format when the column actually has one and it's not the diff --git a/shortcuts/sheets/lark_sheet_table_io_test.go b/shortcuts/sheets/lark_sheet_table_io_test.go index 5f398eeb..ab0fdafe 100644 --- a/shortcuts/sheets/lark_sheet_table_io_test.go +++ b/shortcuts/sheets/lark_sheet_table_io_test.go @@ -460,6 +460,11 @@ func TestTablePut_Validation(t *testing.T) { args: []string{"--url", testURL, "--sheets", `{"sheets":[{"name":"S","columns":["a","b"],"data":[["only-one"]]}]}`}, want: "column count", }, + { + name: "trailing JSON data after --sheets value rejected", + args: []string{"--url", testURL, "--sheets", `{"sheets":[{"name":"S","columns":["a"],"data":[]}]} trailing`}, + want: "trailing data after JSON value", + }, } for _, tt := range cases { t.Run(tt.name, func(t *testing.T) { @@ -1191,6 +1196,73 @@ func TestTableGet_ExecuteRoundTrip(t *testing.T) { } } +// TestTableGet_DuplicateHeaderRejected covers a fangshuyu-flagged round-trip +// hole: a sheet with two columns named the same (e.g. `amount, amount`) used +// to read back successfully — with the dtypes map silently collapsing to a +// single entry — and then fail on the immediate +table-put write because the +// writer rejects duplicate column names. Fail fast at read time with an +// actionable hint to rename or pass --no-header instead. +func TestTableGet_DuplicateHeaderRejected(t *testing.T) { + t.Parallel() + structure := toolOutputStub(testToken, "read", `{"sheets":[{"sheet_id":"`+testSheetID+`","sheet_name":"S","row_count":200,"column_count":20,"index":0}]}`) + region := toolOutputStub(testToken, "read", `{"current_region":"A1:B2"}`) + cells := toolOutputStub(testToken, "read", `{"ranges":[{"cells":[`+ + `[{"value":"amount"},{"value":"amount"}],`+ + `[{"value":1},{"value":2}]`+ + `]}]}`) + out, err := runShortcutWithStubs(t, TableGet, + []string{"--url", testURL, "--sheet-name", "S"}, structure, region, cells) + if err == nil { + t.Fatalf("expected validation error for duplicate header; got nil. out=%s", out) + } + combined := out + err.Error() + if !strings.Contains(combined, "duplicate header column name") { + t.Errorf("error should flag duplicate header; got=%s", combined) + } + if !strings.Contains(combined, "--no-header") { + t.Errorf("error should hint about --no-header; got=%s", combined) + } +} + +// TestTableGet_SheetIDFallbackBackfillsName covers the fallback path: when +// get_workbook_structure cannot enumerate the target row (e.g. selector +// mismatch), the target name was previously left empty — which would later +// fail the +table-put round-trip because the writer requires a non-empty +// sheet name. The target should fall back to using the id as the name. +func TestTableGet_SheetIDFallbackBackfillsName(t *testing.T) { + t.Parallel() + // Structure has a different sheet — selector mismatch triggers the fallback. + structure := toolOutputStub(testToken, "read", `{"sheets":[{"sheet_id":"shtOther","sheet_name":"另一张","row_count":200,"column_count":20,"index":0}]}`) + region := toolOutputStub(testToken, "read", `{"current_region":"A1:A2"}`) + cells := toolOutputStub(testToken, "read", `{"ranges":[{"cells":[[{"value":"h"}],[{"value":"x"}]]}]}`) + out, err := runShortcutWithStubs(t, TableGet, + []string{"--url", testURL, "--sheet-id", testSheetID}, structure, region, cells) + if err != nil { + t.Fatalf("execute failed: %v\nout=%s", err, out) + } + data := decodeEnvelopeData(t, out) + s0, _ := data["sheets"].([]interface{})[0].(map[string]interface{}) + if s0["name"] != testSheetID { + t.Errorf("fallback name = %v, want %q (id used as name)", s0["name"], testSheetID) + } +} + +// TestTablePutFullRange_EmptyMatrix covers the dry-run report for the +// header=false + rows=[] case. Previously totalRows=0 produced an invalid +// "A1:C0"; the helper now returns an empty range string instead. +func TestTablePutFullRange_EmptyMatrix(t *testing.T) { + t.Parallel() + s := &tableSheetSpec{Name: "S", Columns: []tableColumnSpec{{Name: "a"}, {Name: "b"}, {Name: "c"}}, StartCell: "A1"} + if got := tablePutFullRange(s, 0); got != "" { + t.Errorf("tablePutFullRange(_, 0) = %q, want \"\" (no invalid A1:C0)", got) + } + // Sanity check: positive totals still produce a real rectangle so we + // haven't broken the happy path. + if got := tablePutFullRange(s, 2); got != "A1:C2" { + t.Errorf("tablePutFullRange(_, 2) = %q, want A1:C2", got) + } +} + // TestTableGet_OutputRoundTripsBackIntoTablePut is the contract test: the // output of +table-get must be a payload +table-put accepts. This catches // dtype/format symmetry breaks early — if the reader ever emits a dtype the