Files
larksuite-cli/shortcuts/common/helpers_test.go
WJzz1 8bc4ec3fff fix(common): escape special chars in multipart form filenames (#1037)
* fix(common): escape special chars in multipart form filenames

MultipartWriter.CreateFormFile concatenated the fieldname and filename
into the Content-Disposition header without escaping, so a filename
containing a double-quote, backslash, CR, or LF produced a malformed
header. For example, uploading `report "draft" v2.pdf` via
`task +upload-attachment` made the server see `filename="report "`
(truncated at the first internal quote) and drop the rest.

Drop the custom override and let CreateFormFile be promoted from the
embedded *multipart.Writer, which applies the stdlib's quoteEscaper
(backslash and double-quote get a backslash prefix; CR and LF get
percent-encoded). The Content-Type ("application/octet-stream") and
the wrapper API are unchanged, so the existing `task +upload-attachment`
call site is unaffected -- filenames with special characters just now
round-trip correctly.

Add helpers_test.go covering plain, quoted, backslashed, mixed, and
unicode filenames. The test asserts both the on-wire encoding and a
round-trip through mime.ParseMediaType (bypassing Part.FileName, whose
filepath.Base is platform-dependent for backslash on Windows).

* test(common): cover CR/LF/CRLF in multipart filename escaping

Per code-review feedback, extend the helpers_test.go cases table with
CR, LF, and CRLF filenames so the test exercises both legs of the
stdlib's quoteEscaper:

  - backslash and double-quote use backslash escaping (quoted-pair);
    these round-trip exactly through mime.ParseMediaType.
  - CR and LF use percent encoding to prevent header injection; the
    MIME parser does not decode percent escapes, so the read-side
    filename param contains literal "%0D"/"%0A".

The cases table grows a wantParsed column so each case can declare its
expected post-parse value (same as filename for backslash-escaped chars,
percent-encoded for CR/LF).

* refactor(common): polish doc comments and regroup test cases

Two follow-up tweaks suggested by a re-read of the PR:

- helpers.go: stop naming the stdlib's internal `quoteEscaper` in the
  doc comment. Describe the observable behaviour ("escapes special
  characters") instead, so the comment stays valid if the stdlib ever
  renames or reimplements its escaping.

- helpers_test.go: rename the vague `with both` case to
  `backslash and quote`; split the table-driven cases into three
  visually-separated groups (happy path / backslash escaping /
  percent encoding) so it is obvious why two cases have a different
  wantParsed than filename.

No behaviour change; tests still pass 8/8.

* test(common): drop CR/LF filename cases that depend on Go 1.24+ stdlib

CI runs against the toolchain pinned in go.mod (1.23.0), whose
multipart/Writer.quoteEscaper escapes only backslash and double-quote.
Percent-encoding of CR and LF was added to the stdlib later, so the
three CR / LF / CRLF cases I added on review feedback fail on CI: the
literal CR/LF lands in the Content-Disposition header and the parser
reports `malformed MIME header: missing colon`.

Drop those three cases. The fix in the prior commits still covers the
real-world bug — backslash and double-quote in filenames — which is
what the original `report "draft".pdf` example demonstrates. CR or LF
in a filename is essentially never legal on any supported OS, so
leaving that edge case to a future stdlib upgrade keeps the test
stable across toolchains.

Also dropped the now-unused wantParsed column from the cases table:
with only round-trippable characters left, mime.ParseMediaType returns
the original filename byte-for-byte, so a single tc.filename comparison
suffices.

---------

Co-authored-by: Wang-Yeah623 <Wang-Yeah623@users.noreply.github.com>
2026-05-25 22:51:02 +08:00

114 lines
3.7 KiB
Go

// Copyright (c) 2026 Lark Technologies Pte. Ltd.
// SPDX-License-Identifier: MIT
package common
import (
"bytes"
"io"
"mime"
"mime/multipart"
"strings"
"testing"
)
// TestMultipartWriter_CreateFormFile_EscapesFilename verifies that filenames
// containing backslash or double-quote — the two characters every supported
// Go version's stdlib escapes via quoted-pair — are properly encoded on the
// wire and round-trip through mime.ParseMediaType.
//
// Regression test: an earlier custom CreateFormFile concatenated raw strings
// without escaping, so a filename like `report "draft".pdf` produced a
// malformed header that servers parsed as `filename="report "` (truncated at
// the first internal quote).
//
// CR / LF in filenames are not covered here: Go 1.23's stdlib does not
// percent-encode them, so they would break the header — but a CR or LF in a
// real filename is essentially never legal on any supported OS, so leaving it
// out of scope keeps the test stable across stdlib versions.
//
// Filename parameters are read via mime.ParseMediaType on the raw
// Content-Disposition header — Part.FileName runs the result through
// filepath.Base which is platform-dependent for backslash on Windows.
func TestMultipartWriter_CreateFormFile_EscapesFilename(t *testing.T) {
t.Parallel()
cases := []struct {
name string
filename string
wantEncoded string // expected escaped form embedded in the header
}{
// happy path: no characters need escaping
{"plain ASCII", "report.pdf", "report.pdf"},
{"unicode", "报告 v2.pdf", "报告 v2.pdf"},
// backslash escaping: round-trips exactly through mime.ParseMediaType
{"double quote", `report "draft" v2.pdf`, `report \"draft\" v2.pdf`},
{"backslash", `report\draft.pdf`, `report\\draft.pdf`},
{"backslash and quote", `path\to "weird" file.bin`, `path\\to \"weird\" file.bin`},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
var buf bytes.Buffer
mw := NewMultipartWriter(&buf)
w, err := mw.CreateFormFile("file", tc.filename)
if err != nil {
t.Fatalf("CreateFormFile error: %v", err)
}
if _, err := io.WriteString(w, "body-bytes"); err != nil {
t.Fatalf("write body: %v", err)
}
if err := mw.Close(); err != nil {
t.Fatalf("close writer: %v", err)
}
body := buf.String()
wantHeader := `filename="` + tc.wantEncoded + `"`
if !strings.Contains(body, wantHeader) {
t.Errorf("Content-Disposition does not contain %q\nbody:\n%s", wantHeader, body)
}
r := multipart.NewReader(strings.NewReader(body), mw.Boundary())
part, err := r.NextPart()
if err != nil {
t.Fatalf("read part: %v", err)
}
_, params, err := mime.ParseMediaType(part.Header.Get("Content-Disposition"))
if err != nil {
t.Fatalf("ParseMediaType on Content-Disposition: %v", err)
}
if got := params["filename"]; got != tc.filename {
t.Errorf("filename round-trip: got %q, want %q", got, tc.filename)
}
if got := params["name"]; got != "file" {
t.Errorf("name: got %q, want %q", got, "file")
}
})
}
}
// TestMultipartWriter_CreateFormFile_ContentType verifies that the file part
// carries the expected Content-Type for binary uploads.
func TestMultipartWriter_CreateFormFile_ContentType(t *testing.T) {
t.Parallel()
var buf bytes.Buffer
mw := NewMultipartWriter(&buf)
if _, err := mw.CreateFormFile("file", "x.bin"); err != nil {
t.Fatalf("CreateFormFile: %v", err)
}
if err := mw.Close(); err != nil {
t.Fatalf("close: %v", err)
}
r := multipart.NewReader(&buf, mw.Boundary())
part, err := r.NextPart()
if err != nil {
t.Fatalf("read part: %v", err)
}
if got := part.Header.Get("Content-Type"); got != "application/octet-stream" {
t.Errorf("Content-Type: got %q, want application/octet-stream", got)
}
}