fix(sheets): harden +table-put / +table-get input validation and round-trip safety

Four review-flagged correctness gaps in table I/O, all bundled because
they touch the same file:

1. --sheets accepted trailing data after the first JSON value
   (json.Decoder does not surface that, unlike json.Unmarshal). A new
   decoderExpectEOF helper rejects e.g. `--sheets '{...} oops'` with a
   typed validation error instead of letting the leading object pass
   through and surface as a confusing downstream failure.

2. +table-get with a duplicate header (e.g. `amount, amount`) used to
   read back successfully — the dtypes map silently collapsed to one
   entry — and only failed later on +table-put because the writer
   rejects duplicate column names. Fail fast at read time with an
   actionable hint to rename or pass --no-header. --no-header mode is
   exempt (fallback col<N> names are always unique).

3. +table-put dry-run rendered an invalid range like A1:C0 when
   header=false with rows=[]. tablePutFullRange returns "" for an
   empty matrix or zero columns instead of building a degenerate
   rectangle.

4. +table-get with --sheet-id and a get_workbook_structure miss (read
   failure or selector mismatch) used to return a target with
   name="", which then broke +table-get → +table-put round-trip (the
   writer requires a non-empty sheet name). Fall back to using the id
   as the name.

End-to-end verified against a real spreadsheet: trailing data, duplicate
header, and --no-header fallback all behave as advertised.
This commit is contained in:
xiongyuanwen-byted
2026-06-24 13:10:39 +08:00
parent d994c27819
commit 086876d272
2 changed files with 123 additions and 4 deletions

View File

@@ -6,7 +6,9 @@ package sheets
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"math"
"strconv"
"strings"
@@ -232,6 +234,21 @@ func typeToDtype(typ string) string {
}
}
// decoderExpectEOF ensures the decoder has nothing left to read after a
// successful Decode. json.Decoder accepts trailing non-whitespace after the
// first JSON value (unlike json.Unmarshal), so a payload like `{...} trailing`
// would silently be treated as the leading object only. Use this after the
// first Decode to surface the trailing data as a validation error.
func decoderExpectEOF(dec *json.Decoder) error {
var trailing json.RawMessage
if err := dec.Decode(&trailing); err == nil {
return fmt.Errorf("trailing data after JSON value") //nolint:forbidigo // intermediate error; the caller wraps it into a typed --sheets/--values validation error
} else if !errors.Is(err, io.EOF) {
return fmt.Errorf("trailing data after JSON value: %w", err) //nolint:forbidigo // intermediate error; the caller wraps it into a typed --sheets/--values validation error
}
return nil
}
// parseTablePutPayload reads --sheets (JSON, supports @file / stdin) into a
// validated payload. UseNumber keeps numeric cells as json.Number so large
// integers (order IDs, etc.) survive without precision loss or scientific
@@ -252,6 +269,12 @@ func parseTablePutPayload(runtime flagView) (*tablePayload, error) {
if err := dec.Decode(&wire); err != nil {
return nil, common.ValidationErrorf("--sheets: invalid JSON: %w", err)
}
// Reject trailing non-whitespace after the first JSON value: json.Decoder
// accepts it silently (unlike json.Unmarshal), so e.g. `--sheets '{...} oops'`
// would otherwise pass Validate and surface as confusing downstream errors.
if err := decoderExpectEOF(dec); err != nil {
return nil, common.ValidationErrorf("--sheets: %v", err).WithCause(err)
}
p := &tablePayload{Sheets: make([]tableSheetSpec, 0, len(wire.Sheets))}
for i := range wire.Sheets {
spec, err := wire.Sheets[i].normalize(i)
@@ -579,8 +602,13 @@ func sheetAnchor(s *tableSheetSpec) (anchor string, col0, row0 int, err error) {
}
// tablePutFullRange is the A1 rectangle the whole matrix (header + data)
// occupies, for reporting in the result / dry-run.
// occupies, for reporting in the result / dry-run. Returns "" when there is
// nothing to write (e.g. header=false with no data rows) — the previous
// formula would have produced an invalid trailing row like "A1:C0".
func tablePutFullRange(s *tableSheetSpec, totalRows int) string {
if totalRows <= 0 || len(s.Columns) == 0 {
return ""
}
_, col0, row0, err := sheetAnchor(s)
if err != nil {
return strings.TrimSpace(s.StartCell)
@@ -1124,8 +1152,11 @@ func tableGetTargets(ctx context.Context, runtime *common.RuntimeContext, token
// Single-sheet selector path can degrade gracefully without dimensions
// (the probe falls back to the A1 anchor); the whole-workbook path can't
// enumerate sheets without the structure, so it must surface the error.
// Name doubles as id for --sheet-id so the output spec is never nameless
// (an empty name would break +table-get → +table-put round-trip — the
// writer requires a non-empty sheet name).
if id != "" {
return []tableGetSheet{{id: id}}, nil
return []tableGetSheet{{id: id, name: id}}, nil
}
if name != "" {
return []tableGetSheet{{name: name}}, nil
@@ -1138,7 +1169,8 @@ func tableGetTargets(ctx context.Context, runtime *common.RuntimeContext, token
// Selector path: find the matching sheet to pick up its dimensions (and
// backfill name when only --sheet-id was given). If no row matches, fall
// back to a dimensionless target so the read still proceeds via the A1
// anchor rather than erroring on a structure/selector mismatch.
// anchor rather than erroring on a structure/selector mismatch — same
// id-as-name backfill applies so the output spec round-trips.
if id != "" || name != "" {
for _, r := range raw {
sid, sname, rc, cc := tableGetSheetMeta(r)
@@ -1147,7 +1179,7 @@ func tableGetTargets(ctx context.Context, runtime *common.RuntimeContext, token
}
}
if id != "" {
return []tableGetSheet{{id: id}}, nil
return []tableGetSheet{{id: id, name: id}}, nil
}
return []tableGetSheet{{name: name}}, nil
}
@@ -1254,10 +1286,25 @@ func readSheetAsSpec(ctx context.Context, runtime *common.RuntimeContext, token
colTypes := make([]string, ncols)
dtypes := make(map[string]interface{}, ncols)
formats := map[string]interface{}{}
// Duplicate header names break +table-get → +table-put round-trip: the dtypes
// map (keyed by name) silently collapses to a single entry and the writer
// later rejects the duplicate columns in --sheets validation. Fail fast with
// an actionable hint when the source sheet actually has duplicate headers.
// noHeader mode is exempt because tableGetColumnName falls back to positional
// col<N> names which are always unique.
seenNames := map[string]int{}
for c := 0; c < ncols; c++ {
typ, format := inferColumnType(dataRows, c)
colTypes[c] = typ
name := tableGetColumnName(headerRow, c, noHeader)
if !noHeader {
if prev, dup := seenNames[name]; dup {
return nil, common.ValidationErrorf(
"sheet %q: duplicate header column name %q at columns %d and %d; this would break the +table-get → +table-put round-trip. Rename the headers or pass --no-header to read by position (col1/col2/…).",
t.name, name, prev+1, c+1)
}
seenNames[name] = c
}
columnNames[c] = name
dtypes[name] = typeToDtype(typ)
// Only emit a format when the column actually has one and it's not the

View File

@@ -460,6 +460,11 @@ func TestTablePut_Validation(t *testing.T) {
args: []string{"--url", testURL, "--sheets", `{"sheets":[{"name":"S","columns":["a","b"],"data":[["only-one"]]}]}`},
want: "column count",
},
{
name: "trailing JSON data after --sheets value rejected",
args: []string{"--url", testURL, "--sheets", `{"sheets":[{"name":"S","columns":["a"],"data":[]}]} trailing`},
want: "trailing data after JSON value",
},
}
for _, tt := range cases {
t.Run(tt.name, func(t *testing.T) {
@@ -1191,6 +1196,73 @@ func TestTableGet_ExecuteRoundTrip(t *testing.T) {
}
}
// TestTableGet_DuplicateHeaderRejected covers a fangshuyu-flagged round-trip
// hole: a sheet with two columns named the same (e.g. `amount, amount`) used
// to read back successfully — with the dtypes map silently collapsing to a
// single entry — and then fail on the immediate +table-put write because the
// writer rejects duplicate column names. Fail fast at read time with an
// actionable hint to rename or pass --no-header instead.
func TestTableGet_DuplicateHeaderRejected(t *testing.T) {
t.Parallel()
structure := toolOutputStub(testToken, "read", `{"sheets":[{"sheet_id":"`+testSheetID+`","sheet_name":"S","row_count":200,"column_count":20,"index":0}]}`)
region := toolOutputStub(testToken, "read", `{"current_region":"A1:B2"}`)
cells := toolOutputStub(testToken, "read", `{"ranges":[{"cells":[`+
`[{"value":"amount"},{"value":"amount"}],`+
`[{"value":1},{"value":2}]`+
`]}]}`)
out, err := runShortcutWithStubs(t, TableGet,
[]string{"--url", testURL, "--sheet-name", "S"}, structure, region, cells)
if err == nil {
t.Fatalf("expected validation error for duplicate header; got nil. out=%s", out)
}
combined := out + err.Error()
if !strings.Contains(combined, "duplicate header column name") {
t.Errorf("error should flag duplicate header; got=%s", combined)
}
if !strings.Contains(combined, "--no-header") {
t.Errorf("error should hint about --no-header; got=%s", combined)
}
}
// TestTableGet_SheetIDFallbackBackfillsName covers the fallback path: when
// get_workbook_structure cannot enumerate the target row (e.g. selector
// mismatch), the target name was previously left empty — which would later
// fail the +table-put round-trip because the writer requires a non-empty
// sheet name. The target should fall back to using the id as the name.
func TestTableGet_SheetIDFallbackBackfillsName(t *testing.T) {
t.Parallel()
// Structure has a different sheet — selector mismatch triggers the fallback.
structure := toolOutputStub(testToken, "read", `{"sheets":[{"sheet_id":"shtOther","sheet_name":"另一张","row_count":200,"column_count":20,"index":0}]}`)
region := toolOutputStub(testToken, "read", `{"current_region":"A1:A2"}`)
cells := toolOutputStub(testToken, "read", `{"ranges":[{"cells":[[{"value":"h"}],[{"value":"x"}]]}]}`)
out, err := runShortcutWithStubs(t, TableGet,
[]string{"--url", testURL, "--sheet-id", testSheetID}, structure, region, cells)
if err != nil {
t.Fatalf("execute failed: %v\nout=%s", err, out)
}
data := decodeEnvelopeData(t, out)
s0, _ := data["sheets"].([]interface{})[0].(map[string]interface{})
if s0["name"] != testSheetID {
t.Errorf("fallback name = %v, want %q (id used as name)", s0["name"], testSheetID)
}
}
// TestTablePutFullRange_EmptyMatrix covers the dry-run report for the
// header=false + rows=[] case. Previously totalRows=0 produced an invalid
// "A1:C0"; the helper now returns an empty range string instead.
func TestTablePutFullRange_EmptyMatrix(t *testing.T) {
t.Parallel()
s := &tableSheetSpec{Name: "S", Columns: []tableColumnSpec{{Name: "a"}, {Name: "b"}, {Name: "c"}}, StartCell: "A1"}
if got := tablePutFullRange(s, 0); got != "" {
t.Errorf("tablePutFullRange(_, 0) = %q, want \"\" (no invalid A1:C0)", got)
}
// Sanity check: positive totals still produce a real rectangle so we
// haven't broken the happy path.
if got := tablePutFullRange(s, 2); got != "A1:C2" {
t.Errorf("tablePutFullRange(_, 2) = %q, want A1:C2", got)
}
}
// TestTableGet_OutputRoundTripsBackIntoTablePut is the contract test: the
// output of +table-get must be a payload +table-put accepts. This catches
// dtype/format symmetry breaks early — if the reader ever emits a dtype the