Compare commits

...

1 Commits

Author SHA1 Message Date
xukun
2a51a2427e feat(mail): add strict local validation for message_ids in +messages shortcut
Add validateMessageIDs function that checks each message ID after
comma splitting to reject clearly illegal input before it reaches the
batch_get API endpoint. Rejects empty/whitespace IDs, natural language,
JSON array strings, colon-separated IDs, and quote-wrapped values.
Strip surrounding quotes before validation. Add comprehensive unit tests.

sprint: S1
2026-06-01 12:04:49 +08:00
2 changed files with 338 additions and 1 deletions

View File

@@ -5,6 +5,9 @@ package mail
import (
"context"
"fmt"
"regexp"
"strings"
"github.com/larksuite/cli/internal/output"
"github.com/larksuite/cli/shortcuts/common"
@@ -35,7 +38,11 @@ var MailMessages = common.Shortcut{
{Name: "print-output-schema", Type: "bool", Desc: "Print output field reference (run this first to learn field names before parsing output)"},
},
Validate: func(ctx context.Context, runtime *common.RuntimeContext) error {
return validateBotMailboxNotMe(runtime)
if err := validateBotMailboxNotMe(runtime); err != nil {
return err
}
messageIDs := splitByComma(runtime.Str("message-ids"))
return validateMessageIDs(messageIDs)
},
DryRun: func(ctx context.Context, runtime *common.RuntimeContext) *common.DryRunAPI {
mailboxID := resolveMailboxID(runtime)
@@ -86,3 +93,95 @@ var MailMessages = common.Shortcut{
return nil
},
}
// messageIDPattern matches a single message ID after cleaning: non-empty,
// no spaces, no brackets, no colons. Message IDs from the Lark mail API are
// opaque strings (typically hex or alphanumeric), so any character that
// suggests structural content (brackets, colons) is rejected.
var messageIDPattern = regexp.MustCompile(`^[^\s\[\]:]+$`)
// commonEnglishWords are words that indicate the input is natural language
// rather than opaque message IDs. The check is case-insensitive.
var commonEnglishWords = []string{
"the", "and", "for", "are", "but", "not", "you", "all", "can", "had",
"her", "was", "one", "our", "out", "get", "has", "how", "its", "may",
"new", "now", "old", "see", "way", "who", "did", "let", "say",
"she", "too", "use", "from", "with", "this", "that", "have",
"will", "been", "they", "what", "about", "would", "could", "their",
"which", "there", "these", "other", "should", "please", "message",
"email", "subject", "fetch", "read", "list", "send", "reply", "forward",
}
// validateMessageIDs validates each individual message ID after comma splitting.
// It rejects IDs that are clearly illegal before they reach the batch_get API:
// - empty or whitespace-only
// - wrapped in literal quotes (stripped before further validation)
// - look like a JSON array string
// - contain colon separators
// - contain spaces (likely natural language)
// - match common English words (likely natural language)
// - don't match a reasonable message ID pattern
func validateMessageIDs(ids []string) error {
if len(ids) == 0 {
return nil // empty list is handled by the Execute function
}
var invalid []string
for _, raw := range ids {
if reason := validateSingleMessageID(raw); reason != "" {
invalid = append(invalid, reason)
}
}
if len(invalid) > 0 {
return output.ErrValidation("invalid --message-ids: %s", strings.Join(invalid, "; "))
}
return nil
}
// validateSingleMessageID returns an empty string if the ID is valid, or a
// human-readable reason if it is invalid. It applies cleaning (quote
// stripping) before validation.
func validateSingleMessageID(raw string) string {
id := strings.TrimSpace(raw)
// Strip surrounding literal quotes (both single and double).
if len(id) >= 2 {
if (id[0] == '"' && id[len(id)-1] == '"') || (id[0] == '\'' && id[len(id)-1] == '\'') {
id = strings.TrimSpace(id[1 : len(id)-1])
}
}
// Reject empty or whitespace-only after trim.
if id == "" {
return fmt.Sprintf("%q: empty or whitespace-only", raw)
}
// Reject JSON array strings (e.g. "[\"id1\",\"id2\"]").
if strings.HasPrefix(id, "[") && strings.HasSuffix(id, "]") {
return fmt.Sprintf("%q: looks like a JSON array, not a single message ID", raw)
}
// Reject colon-separated IDs (e.g. "id1:id2:id3").
if strings.Contains(id, ":") {
return fmt.Sprintf("%q: contains colon separators (multiple IDs concatenated)", raw)
}
// Reject IDs with spaces — likely natural language or malformed input.
if strings.Contains(id, " ") {
return fmt.Sprintf("%q: contains spaces (expected opaque identifier)", raw)
}
// Reject IDs that look like natural language: common English words.
lower := strings.ToLower(id)
for _, word := range commonEnglishWords {
if lower == word {
return fmt.Sprintf("%q: looks like natural language, not a message ID", raw)
}
}
// Final pattern check: non-empty, no spaces, no brackets, no colons.
if !messageIDPattern.MatchString(id) {
return fmt.Sprintf("%q: contains invalid characters (spaces, brackets, or colons)", raw)
}
return ""
}

View File

@@ -0,0 +1,238 @@
// Copyright (c) 2026 Lark Technologies Pte. Ltd.
// SPDX-License-Identifier: MIT
package mail
import (
"strings"
"testing"
)
func TestValidateMessageIDs(t *testing.T) {
tests := []struct {
name string
ids []string
wantErr bool
wantSubstr string
}{
{
name: "empty list passes",
ids: []string{},
wantErr: false,
},
{
name: "valid single ID passes",
ids: []string{"msg_abc123"},
wantErr: false,
},
{
name: "valid multiple IDs pass",
ids: []string{"msg_abc123", "msg_def456", "msg_ghi789"},
wantErr: false,
},
{
name: "valid hex ID passes",
ids: []string{"a1b2c3d4e5f6"},
wantErr: false,
},
{
name: "valid ID with underscores and dashes passes",
ids: []string{"msg_abc-123_def"},
wantErr: false,
},
{
name: "empty string rejected",
ids: []string{""},
wantErr: true,
wantSubstr: "empty or whitespace-only",
},
{
name: "whitespace-only rejected",
ids: []string{" "},
wantErr: true,
wantSubstr: "empty or whitespace-only",
},
{
name: "natural language word rejected",
ids: []string{"message"},
wantErr: true,
wantSubstr: "natural language",
},
{
name: "natural language phrase rejected",
ids: []string{"please read this email"},
wantErr: true,
wantSubstr: "contains spaces",
},
{
name: "JSON array string rejected",
ids: []string{`["id1","id2"]`},
wantErr: true,
wantSubstr: "JSON array",
},
{
name: "JSON array string with spaces rejected",
ids: []string{`[ "id1", "id2" ]`},
wantErr: true,
wantSubstr: "JSON array",
},
{
name: "double-quoted valid ID passes after quote stripping",
ids: []string{`"msg_abc123"`},
wantErr: false,
},
{
name: "single-quoted valid ID passes after quote stripping",
ids: []string{`'msg_abc123'`},
wantErr: false,
},
{
name: "double-quoted natural language rejected after stripping",
ids: []string{`"message"`},
wantErr: true,
wantSubstr: "natural language",
},
{
name: "single-quoted natural language rejected after stripping",
ids: []string{`'email'`},
wantErr: true,
wantSubstr: "natural language",
},
{
name: "ID that just looks like quotes but isn't still valid",
ids: []string{"msg_abc'123"},
wantErr: false,
},
{
name: "colon-separated IDs rejected",
ids: []string{"id1:id2:id3"},
wantErr: true,
wantSubstr: "colon separators",
},
{
name: "mixed valid and invalid reports invalid ones",
ids: []string{"msg_valid123", "the", "msg_another456"},
wantErr: true,
wantSubstr: "natural language",
},
{
name: "double-quoted empty rejected",
ids: []string{`""`},
wantErr: true,
wantSubstr: "empty or whitespace-only",
},
{
name: "single-quoted empty rejected",
ids: []string{`''`},
wantErr: true,
wantSubstr: "empty or whitespace-only",
},
{
name: "natural language word 'email' rejected",
ids: []string{"email"},
wantErr: true,
wantSubstr: "natural language",
},
{
name: "natural language word 'subject' rejected",
ids: []string{"subject"},
wantErr: true,
wantSubstr: "natural language",
},
{
name: "natural language word 'fetch' rejected",
ids: []string{"fetch"},
wantErr: true,
wantSubstr: "natural language",
},
{
name: "numeric ID passes",
ids: []string{"1234567890"},
wantErr: false,
},
{
name: "ID with uppercase passes",
ids: []string{"MSG_ABC123DEF"},
wantErr: false,
},
{
name: "realistic Lark message ID passes",
ids: []string{"gmxxxxxxxxxxxxxx"},
wantErr: false,
},
{
name: "multiple invalid IDs all reported",
ids: []string{"the", "email", "id1:id2"},
wantErr: true,
wantSubstr: "natural language",
},
{
name: "double-quoted whitespace-only rejected",
ids: []string{`" "`},
wantErr: true,
wantSubstr: "empty or whitespace-only",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
err := validateMessageIDs(tt.ids)
if (err != nil) != tt.wantErr {
t.Errorf("validateMessageIDs(%v) error = %v, wantErr %v", tt.ids, err, tt.wantErr)
return
}
if err != nil && tt.wantSubstr != "" {
if !strings.Contains(err.Error(), tt.wantSubstr) {
t.Errorf("validateMessageIDs(%v) error = %v, want substr %q", tt.ids, err, tt.wantSubstr)
}
}
})
}
}
func TestValidateSingleMessageID(t *testing.T) {
tests := []struct {
name string
raw string
wantOK bool
}{
{name: "valid hex ID", raw: "a1b2c3d4", wantOK: true},
{name: "valid prefixed ID", raw: "msg_abc123", wantOK: true},
{name: "empty string", raw: "", wantOK: false},
{name: "whitespace only", raw: " ", wantOK: false},
{name: "tab only", raw: "\t", wantOK: false},
{name: "natural language phrase", raw: "please read my email", wantOK: false},
{name: "JSON array", raw: `["id1","id2"]`, wantOK: false},
{name: "colon separated", raw: "id1:id2:id3", wantOK: false},
{name: "double quoted valid ID passes after strip", raw: `"msg_abc"`, wantOK: true},
{name: "single quoted valid ID passes after strip", raw: `'msg_abc'`, wantOK: true},
{name: "double quoted natural language rejected after strip", raw: `"message"`, wantOK: false},
{name: "single quoted natural language rejected after strip", raw: `'email'`, wantOK: false},
{name: "word: message", raw: "message", wantOK: false},
{name: "word: email", raw: "email", wantOK: false},
{name: "word: subject", raw: "subject", wantOK: false},
{name: "word: please", raw: "please", wantOK: false},
{name: "word: THE", raw: "THE", wantOK: false},
{name: "numeric ID", raw: "1234567890", wantOK: true},
{name: "ID with dash", raw: "msg-abc-123", wantOK: true},
{name: "ID with dot", raw: "msg.abc.123", wantOK: true},
{name: "ID with underscore", raw: "msg_abc_123", wantOK: true},
{name: "double quoted empty", raw: `""`, wantOK: false},
{name: "single quoted empty", raw: `''`, wantOK: false},
{name: "double quoted whitespace", raw: `" "`, wantOK: false},
{name: "double quoted colon-separated rejected after strip", raw: `"a:b:c"`, wantOK: false},
{name: "double quoted JSON array rejected", raw: `"[]"`, wantOK: false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
reason := validateSingleMessageID(tt.raw)
if tt.wantOK && reason != "" {
t.Errorf("validateSingleMessageID(%q) = %q, want empty (valid)", tt.raw, reason)
}
if !tt.wantOK && reason == "" {
t.Errorf("validateSingleMessageID(%q) = empty, want rejection reason", tt.raw)
}
})
}
}