mirror of
https://github.com/larksuite/cli.git
synced 2026-07-03 14:02:43 +08:00
fix(sheets): harden +table-put / +table-get input validation and round-trip safety
Four review-flagged correctness gaps in table I/O, all bundled because
they touch the same file:
1. --sheets accepted trailing data after the first JSON value
(json.Decoder does not surface that, unlike json.Unmarshal). A new
decoderExpectEOF helper rejects e.g. `--sheets '{...} oops'` with a
typed validation error instead of letting the leading object pass
through and surface as a confusing downstream failure.
2. +table-get with a duplicate header (e.g. `amount, amount`) used to
read back successfully — the dtypes map silently collapsed to one
entry — and only failed later on +table-put because the writer
rejects duplicate column names. Fail fast at read time with an
actionable hint to rename or pass --no-header. --no-header mode is
exempt (fallback col<N> names are always unique).
3. +table-put dry-run rendered an invalid range like A1:C0 when
header=false with rows=[]. tablePutFullRange returns "" for an
empty matrix or zero columns instead of building a degenerate
rectangle.
4. +table-get with --sheet-id and a get_workbook_structure miss (read
failure or selector mismatch) used to return a target with
name="", which then broke +table-get → +table-put round-trip (the
writer requires a non-empty sheet name). Fall back to using the id
as the name.
End-to-end verified against a real spreadsheet: trailing data, duplicate
header, and --no-header fallback all behave as advertised.
This commit is contained in:
@@ -6,7 +6,9 @@ package sheets
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"strconv"
|
||||
"strings"
|
||||
@@ -232,6 +234,21 @@ func typeToDtype(typ string) string {
|
||||
}
|
||||
}
|
||||
|
||||
// decoderExpectEOF ensures the decoder has nothing left to read after a
|
||||
// successful Decode. json.Decoder accepts trailing non-whitespace after the
|
||||
// first JSON value (unlike json.Unmarshal), so a payload like `{...} trailing`
|
||||
// would silently be treated as the leading object only. Use this after the
|
||||
// first Decode to surface the trailing data as a validation error.
|
||||
func decoderExpectEOF(dec *json.Decoder) error {
|
||||
var trailing json.RawMessage
|
||||
if err := dec.Decode(&trailing); err == nil {
|
||||
return fmt.Errorf("trailing data after JSON value") //nolint:forbidigo // intermediate error; the caller wraps it into a typed --sheets/--values validation error
|
||||
} else if !errors.Is(err, io.EOF) {
|
||||
return fmt.Errorf("trailing data after JSON value: %w", err) //nolint:forbidigo // intermediate error; the caller wraps it into a typed --sheets/--values validation error
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// parseTablePutPayload reads --sheets (JSON, supports @file / stdin) into a
|
||||
// validated payload. UseNumber keeps numeric cells as json.Number so large
|
||||
// integers (order IDs, etc.) survive without precision loss or scientific
|
||||
@@ -252,6 +269,12 @@ func parseTablePutPayload(runtime flagView) (*tablePayload, error) {
|
||||
if err := dec.Decode(&wire); err != nil {
|
||||
return nil, common.ValidationErrorf("--sheets: invalid JSON: %w", err)
|
||||
}
|
||||
// Reject trailing non-whitespace after the first JSON value: json.Decoder
|
||||
// accepts it silently (unlike json.Unmarshal), so e.g. `--sheets '{...} oops'`
|
||||
// would otherwise pass Validate and surface as confusing downstream errors.
|
||||
if err := decoderExpectEOF(dec); err != nil {
|
||||
return nil, common.ValidationErrorf("--sheets: %v", err).WithCause(err)
|
||||
}
|
||||
p := &tablePayload{Sheets: make([]tableSheetSpec, 0, len(wire.Sheets))}
|
||||
for i := range wire.Sheets {
|
||||
spec, err := wire.Sheets[i].normalize(i)
|
||||
@@ -579,8 +602,13 @@ func sheetAnchor(s *tableSheetSpec) (anchor string, col0, row0 int, err error) {
|
||||
}
|
||||
|
||||
// tablePutFullRange is the A1 rectangle the whole matrix (header + data)
|
||||
// occupies, for reporting in the result / dry-run.
|
||||
// occupies, for reporting in the result / dry-run. Returns "" when there is
|
||||
// nothing to write (e.g. header=false with no data rows) — the previous
|
||||
// formula would have produced an invalid trailing row like "A1:C0".
|
||||
func tablePutFullRange(s *tableSheetSpec, totalRows int) string {
|
||||
if totalRows <= 0 || len(s.Columns) == 0 {
|
||||
return ""
|
||||
}
|
||||
_, col0, row0, err := sheetAnchor(s)
|
||||
if err != nil {
|
||||
return strings.TrimSpace(s.StartCell)
|
||||
@@ -1124,8 +1152,11 @@ func tableGetTargets(ctx context.Context, runtime *common.RuntimeContext, token
|
||||
// Single-sheet selector path can degrade gracefully without dimensions
|
||||
// (the probe falls back to the A1 anchor); the whole-workbook path can't
|
||||
// enumerate sheets without the structure, so it must surface the error.
|
||||
// Name doubles as id for --sheet-id so the output spec is never nameless
|
||||
// (an empty name would break +table-get → +table-put round-trip — the
|
||||
// writer requires a non-empty sheet name).
|
||||
if id != "" {
|
||||
return []tableGetSheet{{id: id}}, nil
|
||||
return []tableGetSheet{{id: id, name: id}}, nil
|
||||
}
|
||||
if name != "" {
|
||||
return []tableGetSheet{{name: name}}, nil
|
||||
@@ -1138,7 +1169,8 @@ func tableGetTargets(ctx context.Context, runtime *common.RuntimeContext, token
|
||||
// Selector path: find the matching sheet to pick up its dimensions (and
|
||||
// backfill name when only --sheet-id was given). If no row matches, fall
|
||||
// back to a dimensionless target so the read still proceeds via the A1
|
||||
// anchor rather than erroring on a structure/selector mismatch.
|
||||
// anchor rather than erroring on a structure/selector mismatch — same
|
||||
// id-as-name backfill applies so the output spec round-trips.
|
||||
if id != "" || name != "" {
|
||||
for _, r := range raw {
|
||||
sid, sname, rc, cc := tableGetSheetMeta(r)
|
||||
@@ -1147,7 +1179,7 @@ func tableGetTargets(ctx context.Context, runtime *common.RuntimeContext, token
|
||||
}
|
||||
}
|
||||
if id != "" {
|
||||
return []tableGetSheet{{id: id}}, nil
|
||||
return []tableGetSheet{{id: id, name: id}}, nil
|
||||
}
|
||||
return []tableGetSheet{{name: name}}, nil
|
||||
}
|
||||
@@ -1254,10 +1286,25 @@ func readSheetAsSpec(ctx context.Context, runtime *common.RuntimeContext, token
|
||||
colTypes := make([]string, ncols)
|
||||
dtypes := make(map[string]interface{}, ncols)
|
||||
formats := map[string]interface{}{}
|
||||
// Duplicate header names break +table-get → +table-put round-trip: the dtypes
|
||||
// map (keyed by name) silently collapses to a single entry and the writer
|
||||
// later rejects the duplicate columns in --sheets validation. Fail fast with
|
||||
// an actionable hint when the source sheet actually has duplicate headers.
|
||||
// noHeader mode is exempt because tableGetColumnName falls back to positional
|
||||
// col<N> names which are always unique.
|
||||
seenNames := map[string]int{}
|
||||
for c := 0; c < ncols; c++ {
|
||||
typ, format := inferColumnType(dataRows, c)
|
||||
colTypes[c] = typ
|
||||
name := tableGetColumnName(headerRow, c, noHeader)
|
||||
if !noHeader {
|
||||
if prev, dup := seenNames[name]; dup {
|
||||
return nil, common.ValidationErrorf(
|
||||
"sheet %q: duplicate header column name %q at columns %d and %d; this would break the +table-get → +table-put round-trip. Rename the headers or pass --no-header to read by position (col1/col2/…).",
|
||||
t.name, name, prev+1, c+1)
|
||||
}
|
||||
seenNames[name] = c
|
||||
}
|
||||
columnNames[c] = name
|
||||
dtypes[name] = typeToDtype(typ)
|
||||
// Only emit a format when the column actually has one and it's not the
|
||||
|
||||
@@ -460,6 +460,11 @@ func TestTablePut_Validation(t *testing.T) {
|
||||
args: []string{"--url", testURL, "--sheets", `{"sheets":[{"name":"S","columns":["a","b"],"data":[["only-one"]]}]}`},
|
||||
want: "column count",
|
||||
},
|
||||
{
|
||||
name: "trailing JSON data after --sheets value rejected",
|
||||
args: []string{"--url", testURL, "--sheets", `{"sheets":[{"name":"S","columns":["a"],"data":[]}]} trailing`},
|
||||
want: "trailing data after JSON value",
|
||||
},
|
||||
}
|
||||
for _, tt := range cases {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
@@ -1191,6 +1196,73 @@ func TestTableGet_ExecuteRoundTrip(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestTableGet_DuplicateHeaderRejected covers a fangshuyu-flagged round-trip
|
||||
// hole: a sheet with two columns named the same (e.g. `amount, amount`) used
|
||||
// to read back successfully — with the dtypes map silently collapsing to a
|
||||
// single entry — and then fail on the immediate +table-put write because the
|
||||
// writer rejects duplicate column names. Fail fast at read time with an
|
||||
// actionable hint to rename or pass --no-header instead.
|
||||
func TestTableGet_DuplicateHeaderRejected(t *testing.T) {
|
||||
t.Parallel()
|
||||
structure := toolOutputStub(testToken, "read", `{"sheets":[{"sheet_id":"`+testSheetID+`","sheet_name":"S","row_count":200,"column_count":20,"index":0}]}`)
|
||||
region := toolOutputStub(testToken, "read", `{"current_region":"A1:B2"}`)
|
||||
cells := toolOutputStub(testToken, "read", `{"ranges":[{"cells":[`+
|
||||
`[{"value":"amount"},{"value":"amount"}],`+
|
||||
`[{"value":1},{"value":2}]`+
|
||||
`]}]}`)
|
||||
out, err := runShortcutWithStubs(t, TableGet,
|
||||
[]string{"--url", testURL, "--sheet-name", "S"}, structure, region, cells)
|
||||
if err == nil {
|
||||
t.Fatalf("expected validation error for duplicate header; got nil. out=%s", out)
|
||||
}
|
||||
combined := out + err.Error()
|
||||
if !strings.Contains(combined, "duplicate header column name") {
|
||||
t.Errorf("error should flag duplicate header; got=%s", combined)
|
||||
}
|
||||
if !strings.Contains(combined, "--no-header") {
|
||||
t.Errorf("error should hint about --no-header; got=%s", combined)
|
||||
}
|
||||
}
|
||||
|
||||
// TestTableGet_SheetIDFallbackBackfillsName covers the fallback path: when
|
||||
// get_workbook_structure cannot enumerate the target row (e.g. selector
|
||||
// mismatch), the target name was previously left empty — which would later
|
||||
// fail the +table-put round-trip because the writer requires a non-empty
|
||||
// sheet name. The target should fall back to using the id as the name.
|
||||
func TestTableGet_SheetIDFallbackBackfillsName(t *testing.T) {
|
||||
t.Parallel()
|
||||
// Structure has a different sheet — selector mismatch triggers the fallback.
|
||||
structure := toolOutputStub(testToken, "read", `{"sheets":[{"sheet_id":"shtOther","sheet_name":"另一张","row_count":200,"column_count":20,"index":0}]}`)
|
||||
region := toolOutputStub(testToken, "read", `{"current_region":"A1:A2"}`)
|
||||
cells := toolOutputStub(testToken, "read", `{"ranges":[{"cells":[[{"value":"h"}],[{"value":"x"}]]}]}`)
|
||||
out, err := runShortcutWithStubs(t, TableGet,
|
||||
[]string{"--url", testURL, "--sheet-id", testSheetID}, structure, region, cells)
|
||||
if err != nil {
|
||||
t.Fatalf("execute failed: %v\nout=%s", err, out)
|
||||
}
|
||||
data := decodeEnvelopeData(t, out)
|
||||
s0, _ := data["sheets"].([]interface{})[0].(map[string]interface{})
|
||||
if s0["name"] != testSheetID {
|
||||
t.Errorf("fallback name = %v, want %q (id used as name)", s0["name"], testSheetID)
|
||||
}
|
||||
}
|
||||
|
||||
// TestTablePutFullRange_EmptyMatrix covers the dry-run report for the
|
||||
// header=false + rows=[] case. Previously totalRows=0 produced an invalid
|
||||
// "A1:C0"; the helper now returns an empty range string instead.
|
||||
func TestTablePutFullRange_EmptyMatrix(t *testing.T) {
|
||||
t.Parallel()
|
||||
s := &tableSheetSpec{Name: "S", Columns: []tableColumnSpec{{Name: "a"}, {Name: "b"}, {Name: "c"}}, StartCell: "A1"}
|
||||
if got := tablePutFullRange(s, 0); got != "" {
|
||||
t.Errorf("tablePutFullRange(_, 0) = %q, want \"\" (no invalid A1:C0)", got)
|
||||
}
|
||||
// Sanity check: positive totals still produce a real rectangle so we
|
||||
// haven't broken the happy path.
|
||||
if got := tablePutFullRange(s, 2); got != "A1:C2" {
|
||||
t.Errorf("tablePutFullRange(_, 2) = %q, want A1:C2", got)
|
||||
}
|
||||
}
|
||||
|
||||
// TestTableGet_OutputRoundTripsBackIntoTablePut is the contract test: the
|
||||
// output of +table-get must be a payload +table-put accepts. This catches
|
||||
// dtype/format symmetry breaks early — if the reader ever emits a dtype the
|
||||
|
||||
Reference in New Issue
Block a user