Files
larksuite-cli/shortcuts/mail/draft/parse.go
梁硕 83dfb068ad feat: open-source lark-cli — the official CLI for Lark/Feishu
Change-Id: I113d9cdb5403cec347efe4595415e34a18b7decf
2026-03-28 10:36:25 +08:00

510 lines
14 KiB
Go

// Copyright (c) 2026 Lark Technologies Pte. Ltd.
// SPDX-License-Identifier: MIT
package draft
import (
"bytes"
"encoding/base64"
"fmt"
"io"
"mime"
"mime/quotedprintable"
"net/mail"
"strings"
)
func Parse(raw DraftRaw) (*DraftSnapshot, error) {
decoded, err := decodeRawEML(raw.RawEML)
if err != nil {
return nil, err
}
headers, body, err := parseHeaderBlock(decoded)
if err != nil {
return nil, err
}
root, err := parseRootPart(headers, body)
if err != nil {
return nil, err
}
snapshot := &DraftSnapshot{
DraftID: raw.DraftID,
Headers: headers,
Body: root,
}
if err := refreshSnapshot(snapshot); err != nil {
return nil, err
}
return snapshot, nil
}
// maxRawEMLSize is the maximum accepted raw (base64-encoded) EML string length.
// Base64 encodes 3 bytes into 4 chars, so 35 MB covers a 25 MB decoded EML with margin.
const maxRawEMLSize = 35 * 1024 * 1024
func decodeRawEML(raw string) ([]byte, error) {
raw = strings.TrimSpace(raw)
if raw == "" {
return nil, fmt.Errorf("draft raw EML is empty")
}
if len(raw) > maxRawEMLSize {
return nil, fmt.Errorf("draft raw EML is too large (%d bytes, max %d)", len(raw), maxRawEMLSize)
}
decoders := []func(string) ([]byte, error){
base64.URLEncoding.DecodeString,
base64.RawURLEncoding.DecodeString,
base64.StdEncoding.DecodeString,
base64.RawStdEncoding.DecodeString,
}
for _, decode := range decoders {
decoded, err := decode(raw)
if err == nil {
return normalizeLineEndings(decoded), nil
}
}
return nil, fmt.Errorf("draft raw EML is not valid base64url")
}
func normalizeLineEndings(in []byte) []byte {
in = bytes.ReplaceAll(in, []byte("\r\n"), []byte("\n"))
in = bytes.ReplaceAll(in, []byte("\r"), []byte("\n"))
return in
}
func parseHeaderBlock(raw []byte) ([]Header, []byte, error) {
raw = normalizeLineEndings(raw)
sep := bytes.Index(raw, []byte("\n\n"))
if sep < 0 {
return nil, nil, fmt.Errorf("invalid EML: missing header/body separator")
}
headerLines := strings.Split(string(raw[:sep]), "\n")
headers := make([]Header, 0, len(headerLines))
for _, line := range headerLines {
if strings.TrimSpace(line) == "" {
continue
}
if (strings.HasPrefix(line, " ") || strings.HasPrefix(line, "\t")) && len(headers) > 0 {
headers[len(headers)-1].Value += " " + strings.TrimSpace(line)
continue
}
name, value, ok := strings.Cut(line, ":")
if !ok {
// Skip lines without a colon rather than failing. Some email
// systems insert comment or separator lines in the header area.
continue
}
headers = append(headers, Header{
Name: strings.TrimSpace(name),
Value: strings.TrimSpace(value),
})
}
return headers, raw[sep+2:], nil
}
func parseRootPart(messageHeaders []Header, body []byte) (*Part, error) {
partHeaders := extractBodyHeaders(messageHeaders)
part := &Part{
PartID: "1",
Headers: append([]Header{}, partHeaders...),
}
if len(partHeaders) == 0 {
part.MediaType = "text/plain"
part.MediaParams = map[string]string{"charset": "UTF-8"}
part.TransferEncoding = "7bit"
part.Body = body
part.RawEntity = append([]byte{}, body...)
return part, nil
}
rawEntity := buildRawEntity(filterRawEntityHeaders(partHeaders), body)
return parsePart(partHeaders, body, "1", rawEntity, 0)
}
const maxMIMEDepth = 50
func parsePart(headers []Header, body []byte, partID string, rawEntity []byte, depth int) (*Part, error) {
if depth > maxMIMEDepth {
return nil, fmt.Errorf("MIME nesting too deep (max %d levels)", maxMIMEDepth)
}
part := &Part{
PartID: partID,
Headers: append([]Header{}, headers...),
MediaType: "text/plain",
MediaParams: map[string]string{},
ContentDispositionArg: map[string]string{},
RawEntity: append([]byte{}, rawEntity...),
}
if ct := headerValue(headers, "Content-Type"); ct != "" {
mediaType, params, err := mime.ParseMediaType(ct)
if err != nil {
// Fallback: treat as opaque binary so the part is still accessible
// and can round-trip through RawEntity. The original Content-Type
// header is preserved for serialization.
part.MediaType = "application/octet-stream"
part.EncodingProblem = true
} else {
part.MediaType = strings.ToLower(mediaType)
part.MediaParams = lowerCaseKeys(params)
}
} else {
part.MediaParams["charset"] = "UTF-8"
}
if disp := headerValue(headers, "Content-Disposition"); disp != "" {
dispType, params, err := mime.ParseMediaType(disp)
if err == nil {
part.ContentDisposition = strings.ToLower(dispType)
part.ContentDispositionArg = lowerCaseKeys(params)
}
// On parse error, silently ignore the disposition. The original
// header is preserved in part.Headers for serialization.
}
part.ContentID = strings.Trim(strings.TrimSpace(headerValue(headers, "Content-ID")), "<>")
part.TransferEncoding = strings.ToLower(strings.TrimSpace(headerValue(headers, "Content-Transfer-Encoding")))
if strings.HasPrefix(part.MediaType, "multipart/") {
boundary := part.MediaParams["boundary"]
if boundary == "" {
return nil, fmt.Errorf("multipart part %s missing boundary", partID)
}
children, preamble, epilogue, err := parseMultipartChildren(body, boundary, partID, depth)
if err != nil {
return nil, err
}
if len(children) == 0 {
// Boundary declared but never found in the body. Reclassify as
// text rather than returning an empty multipart with no children
// (following mail-parser's approach per Postel's law).
part.MediaType = "text/plain"
part.MediaParams = map[string]string{"charset": "UTF-8"}
part.Body = body
part.EncodingProblem = true
return part, nil
}
part.Children = children
part.Preamble = preamble
part.Epilogue = epilogue
return part, nil
}
decoded, encodingProblem := decodePartBody(body, part.TransferEncoding, part.MediaType, part.MediaParams)
part.Body = decoded
if encodingProblem {
part.EncodingProblem = true
}
return part, nil
}
func parseMultipartChildren(body []byte, boundary, parentPartID string, depth int) ([]*Part, []byte, []byte, error) {
lines := bytes.SplitAfter(body, []byte("\n"))
startLine := "--" + boundary
endLine := "--" + boundary + "--"
var (
children []*Part
preamble bytes.Buffer
epilogue bytes.Buffer
buf bytes.Buffer
inPart bool
afterEnd bool
index int
)
flush := func() error {
// Copy buf content before Reset to avoid memory aliasing: buf.Bytes()
// returns a sub-slice of buf's internal array which gets overwritten
// when the next MIME part is written to buf after Reset.
partBytes := append([]byte{}, bytes.TrimRight(buf.Bytes(), " \t\r\n")...)
buf.Reset()
if len(partBytes) == 0 {
return nil
}
// Ensure the header/body separator (\n\n) is present so parseHeaderBlock
// can split headers from body.
//
// A part whose first byte is \n has no headers (the \n is the blank-line
// separator). Prepend an extra \n so parseHeaderBlock finds \n\n at
// position 0 and returns empty headers.
//
// Otherwise the part has headers but TrimRight may have stripped the
// trailing \n\n when the body was empty. Re-append it.
if len(partBytes) > 0 && partBytes[0] == '\n' {
partBytes = append([]byte{'\n'}, partBytes...)
} else if !bytes.Contains(partBytes, []byte("\n\n")) {
partBytes = append(partBytes, '\n', '\n')
}
index++
partID := fmt.Sprintf("%s.%d", parentPartID, index)
headers, body, err := parseHeaderBlock(partBytes)
if err != nil {
return err
}
child, err := parsePart(headers, body, partID, partBytes, depth+1)
if err != nil {
return err
}
children = append(children, child)
return nil
}
for _, line := range lines {
trimmed := strings.TrimSpace(string(line))
if afterEnd {
epilogue.Write(line)
continue
}
switch trimmed {
case startLine:
if inPart {
if err := flush(); err != nil {
return nil, nil, nil, err
}
} else {
inPart = true
continue
}
inPart = true
case endLine:
if inPart {
if err := flush(); err != nil {
return nil, nil, nil, err
}
}
afterEnd = true
default:
if inPart {
buf.Write(line)
} else {
preamble.Write(line)
}
}
}
if inPart {
if err := flush(); err != nil {
return nil, nil, nil, err
}
}
return children, preamble.Bytes(), epilogue.Bytes(), nil
}
// decodePartBody decodes the transfer-encoded body. The second return value
// indicates whether a decoding problem occurred (the returned bytes are a
// best-effort fallback in that case).
func decodePartBody(body []byte, cte, mediaType string, mediaParams map[string]string) ([]byte, bool) {
var decoded []byte
var problem bool
switch strings.ToLower(strings.TrimSpace(cte)) {
case "", "7bit", "8bit", "binary":
decoded = body
case "base64":
trimmed := strings.Join(strings.Fields(string(body)), "")
var err error
decoded, err = base64.StdEncoding.DecodeString(trimmed)
if err != nil {
// Try without padding — some implementations omit it.
decoded, err = base64.RawStdEncoding.DecodeString(trimmed)
}
if err != nil {
// Give up decoding; keep raw bytes so the part is still
// accessible and can round-trip through RawEntity.
decoded = body
problem = true
}
case "quoted-printable":
var err error
decoded, err = io.ReadAll(quotedprintable.NewReader(bytes.NewReader(body)))
if err != nil {
// Keep raw bytes on QP decode failure (same as base64 fallback).
decoded = body
problem = true
}
default:
decoded = body
}
if !isTextualMediaType(mediaType) {
return decoded, problem
}
text, err := decodeTextCharset(decoded, mediaParams["charset"])
if err != nil {
// Unsupported or malformed charsets should not break draft parsing.
// Keep the decoded transfer bytes so untouched subtrees can still round-trip raw.
return decoded, true
}
return text, problem
}
func extractBodyHeaders(headers []Header) []Header {
out := make([]Header, 0, len(headers))
for _, header := range headers {
if isBodyHeader(header.Name) {
out = append(out, header)
}
}
return out
}
func buildRawEntity(headers []Header, body []byte) []byte {
if len(headers) == 0 {
return append([]byte{}, body...)
}
var buf bytes.Buffer
for _, header := range headers {
buf.WriteString(header.Name)
buf.WriteString(": ")
buf.WriteString(header.Value)
buf.WriteByte('\n')
}
buf.WriteByte('\n')
buf.Write(body)
return buf.Bytes()
}
func filterRawEntityHeaders(headers []Header) []Header {
out := make([]Header, 0, len(headers))
for _, header := range headers {
if strings.EqualFold(header.Name, "MIME-Version") {
continue
}
out = append(out, header)
}
return out
}
func refreshSnapshot(snapshot *DraftSnapshot) error {
snapshot.Subject = decodeHeaderValue(headerValue(snapshot.Headers, "Subject"))
snapshot.MessageID = strings.TrimSpace(headerValue(snapshot.Headers, "Message-ID"))
snapshot.InReplyTo = strings.TrimSpace(headerValue(snapshot.Headers, "In-Reply-To"))
snapshot.References = strings.TrimSpace(headerValue(snapshot.Headers, "References"))
// Address headers are parsed leniently: malformed addresses (non-standard
// display names, semicolons, broken encoding) are silently ignored so that
// the draft can still be opened. The raw header values are preserved in
// snapshot.Headers for round-trip serialization.
snapshot.From, _ = parseAddressHeader(headerValue(snapshot.Headers, "From"))
snapshot.To, _ = parseAddressHeader(headerValue(snapshot.Headers, "To"))
snapshot.Cc, _ = parseAddressHeader(headerValue(snapshot.Headers, "Cc"))
snapshot.Bcc, _ = parseAddressHeader(headerValue(snapshot.Headers, "Bcc"))
snapshot.ReplyTo, _ = parseAddressHeader(headerValue(snapshot.Headers, "Reply-To"))
reindexParts(snapshot.Body, "1")
textPart := findPrimaryBodyPart(snapshot.Body, "text/plain")
htmlPart := findPrimaryBodyPart(snapshot.Body, "text/html")
if textPart != nil {
snapshot.PrimaryTextPartID = textPart.PartID
} else {
snapshot.PrimaryTextPartID = ""
}
if htmlPart != nil {
snapshot.PrimaryHTMLPartID = htmlPart.PartID
} else {
snapshot.PrimaryHTMLPartID = ""
}
// Inline CID consistency is NOT validated here — broken CID references
// should not prevent opening the draft editor. Project() already reports
// missing CIDs as warnings in DraftProjection.Warnings.
return nil
}
func parseAddressHeader(value string) ([]Address, error) {
value = strings.TrimSpace(value)
if value == "" {
return nil, nil
}
parser := &mail.AddressParser{WordDecoder: new(mime.WordDecoder)}
addrs, err := parser.ParseList(value)
if err != nil {
return nil, err
}
out := make([]Address, 0, len(addrs))
for _, addr := range addrs {
out = append(out, Address{
Name: addr.Name,
Address: strings.TrimSpace(addr.Address),
})
}
return out, nil
}
func lowerCaseKeys(in map[string]string) map[string]string {
if len(in) == 0 {
return nil
}
out := make(map[string]string, len(in))
for k, v := range in {
out[strings.ToLower(k)] = v
}
return out
}
func headerValue(headers []Header, name string) string {
for _, header := range headers {
if strings.EqualFold(header.Name, name) {
return header.Value
}
}
return ""
}
func isBodyHeader(name string) bool {
name = strings.ToLower(strings.TrimSpace(name))
return strings.HasPrefix(name, "content-") || name == "mime-version"
}
func reindexParts(part *Part, partID string) {
if part == nil {
return
}
part.PartID = partID
for i, child := range part.Children {
reindexParts(child, fmt.Sprintf("%s.%d", partID, i+1))
}
}
func findPrimaryBodyPart(root *Part, mediaType string) *Part {
var best *Part
bestScore := -1
var walk func(part *Part, ancestors []string)
walk = func(part *Part, ancestors []string) {
if part == nil {
return
}
if !part.IsMultipart() {
score, ok := bodyCandidateScore(part, ancestors, mediaType)
if ok && score > bestScore {
best = part
bestScore = score
}
return
}
nextAncestors := append(append([]string{}, ancestors...), part.MediaType)
for _, child := range part.Children {
walk(child, nextAncestors)
}
}
walk(root, nil)
return best
}
func bodyCandidateScore(part *Part, ancestors []string, mediaType string) (int, bool) {
if part == nil || !strings.EqualFold(part.MediaType, mediaType) {
return 0, false
}
switch strings.ToLower(part.ContentDisposition) {
case "attachment", "inline":
return 0, false
}
score := 1
for _, ancestor := range ancestors {
switch ancestor {
case "multipart/signed", "multipart/encrypted":
return 0, false
case "multipart/alternative":
score += 10
case "multipart/related":
if mediaType == "text/html" {
score += 5
}
}
}
return score, true
}