mirror of
https://github.com/larksuite/cli.git
synced 2026-07-04 23:15:25 +08:00
510 lines
14 KiB
Go
510 lines
14 KiB
Go
// Copyright (c) 2026 Lark Technologies Pte. Ltd.
|
|
// SPDX-License-Identifier: MIT
|
|
|
|
package draft
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/base64"
|
|
"fmt"
|
|
"io"
|
|
"mime"
|
|
"mime/quotedprintable"
|
|
"net/mail"
|
|
"strings"
|
|
)
|
|
|
|
func Parse(raw DraftRaw) (*DraftSnapshot, error) {
|
|
decoded, err := decodeRawEML(raw.RawEML)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
headers, body, err := parseHeaderBlock(decoded)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
root, err := parseRootPart(headers, body)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
snapshot := &DraftSnapshot{
|
|
DraftID: raw.DraftID,
|
|
Headers: headers,
|
|
Body: root,
|
|
}
|
|
if err := refreshSnapshot(snapshot); err != nil {
|
|
return nil, err
|
|
}
|
|
return snapshot, nil
|
|
}
|
|
|
|
// maxRawEMLSize is the maximum accepted raw (base64-encoded) EML string length.
|
|
// Base64 encodes 3 bytes into 4 chars, so 35 MB covers a 25 MB decoded EML with margin.
|
|
const maxRawEMLSize = 35 * 1024 * 1024
|
|
|
|
func decodeRawEML(raw string) ([]byte, error) {
|
|
raw = strings.TrimSpace(raw)
|
|
if raw == "" {
|
|
return nil, fmt.Errorf("draft raw EML is empty")
|
|
}
|
|
if len(raw) > maxRawEMLSize {
|
|
return nil, fmt.Errorf("draft raw EML is too large (%d bytes, max %d)", len(raw), maxRawEMLSize)
|
|
}
|
|
decoders := []func(string) ([]byte, error){
|
|
base64.URLEncoding.DecodeString,
|
|
base64.RawURLEncoding.DecodeString,
|
|
base64.StdEncoding.DecodeString,
|
|
base64.RawStdEncoding.DecodeString,
|
|
}
|
|
for _, decode := range decoders {
|
|
decoded, err := decode(raw)
|
|
if err == nil {
|
|
return normalizeLineEndings(decoded), nil
|
|
}
|
|
}
|
|
return nil, fmt.Errorf("draft raw EML is not valid base64url")
|
|
}
|
|
|
|
func normalizeLineEndings(in []byte) []byte {
|
|
in = bytes.ReplaceAll(in, []byte("\r\n"), []byte("\n"))
|
|
in = bytes.ReplaceAll(in, []byte("\r"), []byte("\n"))
|
|
return in
|
|
}
|
|
|
|
func parseHeaderBlock(raw []byte) ([]Header, []byte, error) {
|
|
raw = normalizeLineEndings(raw)
|
|
sep := bytes.Index(raw, []byte("\n\n"))
|
|
if sep < 0 {
|
|
return nil, nil, fmt.Errorf("invalid EML: missing header/body separator")
|
|
}
|
|
headerLines := strings.Split(string(raw[:sep]), "\n")
|
|
headers := make([]Header, 0, len(headerLines))
|
|
for _, line := range headerLines {
|
|
if strings.TrimSpace(line) == "" {
|
|
continue
|
|
}
|
|
if (strings.HasPrefix(line, " ") || strings.HasPrefix(line, "\t")) && len(headers) > 0 {
|
|
headers[len(headers)-1].Value += " " + strings.TrimSpace(line)
|
|
continue
|
|
}
|
|
name, value, ok := strings.Cut(line, ":")
|
|
if !ok {
|
|
// Skip lines without a colon rather than failing. Some email
|
|
// systems insert comment or separator lines in the header area.
|
|
continue
|
|
}
|
|
headers = append(headers, Header{
|
|
Name: strings.TrimSpace(name),
|
|
Value: strings.TrimSpace(value),
|
|
})
|
|
}
|
|
return headers, raw[sep+2:], nil
|
|
}
|
|
|
|
func parseRootPart(messageHeaders []Header, body []byte) (*Part, error) {
|
|
partHeaders := extractBodyHeaders(messageHeaders)
|
|
part := &Part{
|
|
PartID: "1",
|
|
Headers: append([]Header{}, partHeaders...),
|
|
}
|
|
if len(partHeaders) == 0 {
|
|
part.MediaType = "text/plain"
|
|
part.MediaParams = map[string]string{"charset": "UTF-8"}
|
|
part.TransferEncoding = "7bit"
|
|
part.Body = body
|
|
part.RawEntity = append([]byte{}, body...)
|
|
return part, nil
|
|
}
|
|
rawEntity := buildRawEntity(filterRawEntityHeaders(partHeaders), body)
|
|
return parsePart(partHeaders, body, "1", rawEntity, 0)
|
|
}
|
|
|
|
const maxMIMEDepth = 50
|
|
|
|
func parsePart(headers []Header, body []byte, partID string, rawEntity []byte, depth int) (*Part, error) {
|
|
if depth > maxMIMEDepth {
|
|
return nil, fmt.Errorf("MIME nesting too deep (max %d levels)", maxMIMEDepth)
|
|
}
|
|
part := &Part{
|
|
PartID: partID,
|
|
Headers: append([]Header{}, headers...),
|
|
MediaType: "text/plain",
|
|
MediaParams: map[string]string{},
|
|
ContentDispositionArg: map[string]string{},
|
|
RawEntity: append([]byte{}, rawEntity...),
|
|
}
|
|
if ct := headerValue(headers, "Content-Type"); ct != "" {
|
|
mediaType, params, err := mime.ParseMediaType(ct)
|
|
if err != nil {
|
|
// Fallback: treat as opaque binary so the part is still accessible
|
|
// and can round-trip through RawEntity. The original Content-Type
|
|
// header is preserved for serialization.
|
|
part.MediaType = "application/octet-stream"
|
|
part.EncodingProblem = true
|
|
} else {
|
|
part.MediaType = strings.ToLower(mediaType)
|
|
part.MediaParams = lowerCaseKeys(params)
|
|
}
|
|
} else {
|
|
part.MediaParams["charset"] = "UTF-8"
|
|
}
|
|
if disp := headerValue(headers, "Content-Disposition"); disp != "" {
|
|
dispType, params, err := mime.ParseMediaType(disp)
|
|
if err == nil {
|
|
part.ContentDisposition = strings.ToLower(dispType)
|
|
part.ContentDispositionArg = lowerCaseKeys(params)
|
|
}
|
|
// On parse error, silently ignore the disposition. The original
|
|
// header is preserved in part.Headers for serialization.
|
|
}
|
|
part.ContentID = strings.Trim(strings.TrimSpace(headerValue(headers, "Content-ID")), "<>")
|
|
part.TransferEncoding = strings.ToLower(strings.TrimSpace(headerValue(headers, "Content-Transfer-Encoding")))
|
|
|
|
if strings.HasPrefix(part.MediaType, "multipart/") {
|
|
boundary := part.MediaParams["boundary"]
|
|
if boundary == "" {
|
|
return nil, fmt.Errorf("multipart part %s missing boundary", partID)
|
|
}
|
|
children, preamble, epilogue, err := parseMultipartChildren(body, boundary, partID, depth)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if len(children) == 0 {
|
|
// Boundary declared but never found in the body. Reclassify as
|
|
// text rather than returning an empty multipart with no children
|
|
// (following mail-parser's approach per Postel's law).
|
|
part.MediaType = "text/plain"
|
|
part.MediaParams = map[string]string{"charset": "UTF-8"}
|
|
part.Body = body
|
|
part.EncodingProblem = true
|
|
return part, nil
|
|
}
|
|
part.Children = children
|
|
part.Preamble = preamble
|
|
part.Epilogue = epilogue
|
|
return part, nil
|
|
}
|
|
|
|
decoded, encodingProblem := decodePartBody(body, part.TransferEncoding, part.MediaType, part.MediaParams)
|
|
part.Body = decoded
|
|
if encodingProblem {
|
|
part.EncodingProblem = true
|
|
}
|
|
return part, nil
|
|
}
|
|
|
|
func parseMultipartChildren(body []byte, boundary, parentPartID string, depth int) ([]*Part, []byte, []byte, error) {
|
|
lines := bytes.SplitAfter(body, []byte("\n"))
|
|
startLine := "--" + boundary
|
|
endLine := "--" + boundary + "--"
|
|
|
|
var (
|
|
children []*Part
|
|
preamble bytes.Buffer
|
|
epilogue bytes.Buffer
|
|
buf bytes.Buffer
|
|
inPart bool
|
|
afterEnd bool
|
|
index int
|
|
)
|
|
|
|
flush := func() error {
|
|
// Copy buf content before Reset to avoid memory aliasing: buf.Bytes()
|
|
// returns a sub-slice of buf's internal array which gets overwritten
|
|
// when the next MIME part is written to buf after Reset.
|
|
partBytes := append([]byte{}, bytes.TrimRight(buf.Bytes(), " \t\r\n")...)
|
|
buf.Reset()
|
|
if len(partBytes) == 0 {
|
|
return nil
|
|
}
|
|
// Ensure the header/body separator (\n\n) is present so parseHeaderBlock
|
|
// can split headers from body.
|
|
//
|
|
// A part whose first byte is \n has no headers (the \n is the blank-line
|
|
// separator). Prepend an extra \n so parseHeaderBlock finds \n\n at
|
|
// position 0 and returns empty headers.
|
|
//
|
|
// Otherwise the part has headers but TrimRight may have stripped the
|
|
// trailing \n\n when the body was empty. Re-append it.
|
|
if len(partBytes) > 0 && partBytes[0] == '\n' {
|
|
partBytes = append([]byte{'\n'}, partBytes...)
|
|
} else if !bytes.Contains(partBytes, []byte("\n\n")) {
|
|
partBytes = append(partBytes, '\n', '\n')
|
|
}
|
|
index++
|
|
partID := fmt.Sprintf("%s.%d", parentPartID, index)
|
|
headers, body, err := parseHeaderBlock(partBytes)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
child, err := parsePart(headers, body, partID, partBytes, depth+1)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
children = append(children, child)
|
|
return nil
|
|
}
|
|
|
|
for _, line := range lines {
|
|
trimmed := strings.TrimSpace(string(line))
|
|
if afterEnd {
|
|
epilogue.Write(line)
|
|
continue
|
|
}
|
|
switch trimmed {
|
|
case startLine:
|
|
if inPart {
|
|
if err := flush(); err != nil {
|
|
return nil, nil, nil, err
|
|
}
|
|
} else {
|
|
inPart = true
|
|
continue
|
|
}
|
|
inPart = true
|
|
case endLine:
|
|
if inPart {
|
|
if err := flush(); err != nil {
|
|
return nil, nil, nil, err
|
|
}
|
|
}
|
|
afterEnd = true
|
|
default:
|
|
if inPart {
|
|
buf.Write(line)
|
|
} else {
|
|
preamble.Write(line)
|
|
}
|
|
}
|
|
}
|
|
if inPart {
|
|
if err := flush(); err != nil {
|
|
return nil, nil, nil, err
|
|
}
|
|
}
|
|
return children, preamble.Bytes(), epilogue.Bytes(), nil
|
|
}
|
|
|
|
// decodePartBody decodes the transfer-encoded body. The second return value
|
|
// indicates whether a decoding problem occurred (the returned bytes are a
|
|
// best-effort fallback in that case).
|
|
func decodePartBody(body []byte, cte, mediaType string, mediaParams map[string]string) ([]byte, bool) {
|
|
var decoded []byte
|
|
var problem bool
|
|
switch strings.ToLower(strings.TrimSpace(cte)) {
|
|
case "", "7bit", "8bit", "binary":
|
|
decoded = body
|
|
case "base64":
|
|
trimmed := strings.Join(strings.Fields(string(body)), "")
|
|
var err error
|
|
decoded, err = base64.StdEncoding.DecodeString(trimmed)
|
|
if err != nil {
|
|
// Try without padding — some implementations omit it.
|
|
decoded, err = base64.RawStdEncoding.DecodeString(trimmed)
|
|
}
|
|
if err != nil {
|
|
// Give up decoding; keep raw bytes so the part is still
|
|
// accessible and can round-trip through RawEntity.
|
|
decoded = body
|
|
problem = true
|
|
}
|
|
case "quoted-printable":
|
|
var err error
|
|
decoded, err = io.ReadAll(quotedprintable.NewReader(bytes.NewReader(body)))
|
|
if err != nil {
|
|
// Keep raw bytes on QP decode failure (same as base64 fallback).
|
|
decoded = body
|
|
problem = true
|
|
}
|
|
default:
|
|
decoded = body
|
|
}
|
|
if !isTextualMediaType(mediaType) {
|
|
return decoded, problem
|
|
}
|
|
text, err := decodeTextCharset(decoded, mediaParams["charset"])
|
|
if err != nil {
|
|
// Unsupported or malformed charsets should not break draft parsing.
|
|
// Keep the decoded transfer bytes so untouched subtrees can still round-trip raw.
|
|
return decoded, true
|
|
}
|
|
return text, problem
|
|
}
|
|
|
|
func extractBodyHeaders(headers []Header) []Header {
|
|
out := make([]Header, 0, len(headers))
|
|
for _, header := range headers {
|
|
if isBodyHeader(header.Name) {
|
|
out = append(out, header)
|
|
}
|
|
}
|
|
return out
|
|
}
|
|
|
|
func buildRawEntity(headers []Header, body []byte) []byte {
|
|
if len(headers) == 0 {
|
|
return append([]byte{}, body...)
|
|
}
|
|
var buf bytes.Buffer
|
|
for _, header := range headers {
|
|
buf.WriteString(header.Name)
|
|
buf.WriteString(": ")
|
|
buf.WriteString(header.Value)
|
|
buf.WriteByte('\n')
|
|
}
|
|
buf.WriteByte('\n')
|
|
buf.Write(body)
|
|
return buf.Bytes()
|
|
}
|
|
|
|
func filterRawEntityHeaders(headers []Header) []Header {
|
|
out := make([]Header, 0, len(headers))
|
|
for _, header := range headers {
|
|
if strings.EqualFold(header.Name, "MIME-Version") {
|
|
continue
|
|
}
|
|
out = append(out, header)
|
|
}
|
|
return out
|
|
}
|
|
|
|
func refreshSnapshot(snapshot *DraftSnapshot) error {
|
|
snapshot.Subject = decodeHeaderValue(headerValue(snapshot.Headers, "Subject"))
|
|
snapshot.MessageID = strings.TrimSpace(headerValue(snapshot.Headers, "Message-ID"))
|
|
snapshot.InReplyTo = strings.TrimSpace(headerValue(snapshot.Headers, "In-Reply-To"))
|
|
snapshot.References = strings.TrimSpace(headerValue(snapshot.Headers, "References"))
|
|
|
|
// Address headers are parsed leniently: malformed addresses (non-standard
|
|
// display names, semicolons, broken encoding) are silently ignored so that
|
|
// the draft can still be opened. The raw header values are preserved in
|
|
// snapshot.Headers for round-trip serialization.
|
|
snapshot.From, _ = parseAddressHeader(headerValue(snapshot.Headers, "From"))
|
|
snapshot.To, _ = parseAddressHeader(headerValue(snapshot.Headers, "To"))
|
|
snapshot.Cc, _ = parseAddressHeader(headerValue(snapshot.Headers, "Cc"))
|
|
snapshot.Bcc, _ = parseAddressHeader(headerValue(snapshot.Headers, "Bcc"))
|
|
snapshot.ReplyTo, _ = parseAddressHeader(headerValue(snapshot.Headers, "Reply-To"))
|
|
|
|
reindexParts(snapshot.Body, "1")
|
|
textPart := findPrimaryBodyPart(snapshot.Body, "text/plain")
|
|
htmlPart := findPrimaryBodyPart(snapshot.Body, "text/html")
|
|
if textPart != nil {
|
|
snapshot.PrimaryTextPartID = textPart.PartID
|
|
} else {
|
|
snapshot.PrimaryTextPartID = ""
|
|
}
|
|
if htmlPart != nil {
|
|
snapshot.PrimaryHTMLPartID = htmlPart.PartID
|
|
} else {
|
|
snapshot.PrimaryHTMLPartID = ""
|
|
}
|
|
// Inline CID consistency is NOT validated here — broken CID references
|
|
// should not prevent opening the draft editor. Project() already reports
|
|
// missing CIDs as warnings in DraftProjection.Warnings.
|
|
return nil
|
|
}
|
|
|
|
func parseAddressHeader(value string) ([]Address, error) {
|
|
value = strings.TrimSpace(value)
|
|
if value == "" {
|
|
return nil, nil
|
|
}
|
|
parser := &mail.AddressParser{WordDecoder: new(mime.WordDecoder)}
|
|
addrs, err := parser.ParseList(value)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
out := make([]Address, 0, len(addrs))
|
|
for _, addr := range addrs {
|
|
out = append(out, Address{
|
|
Name: addr.Name,
|
|
Address: strings.TrimSpace(addr.Address),
|
|
})
|
|
}
|
|
return out, nil
|
|
}
|
|
|
|
func lowerCaseKeys(in map[string]string) map[string]string {
|
|
if len(in) == 0 {
|
|
return nil
|
|
}
|
|
out := make(map[string]string, len(in))
|
|
for k, v := range in {
|
|
out[strings.ToLower(k)] = v
|
|
}
|
|
return out
|
|
}
|
|
|
|
func headerValue(headers []Header, name string) string {
|
|
for _, header := range headers {
|
|
if strings.EqualFold(header.Name, name) {
|
|
return header.Value
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func isBodyHeader(name string) bool {
|
|
name = strings.ToLower(strings.TrimSpace(name))
|
|
return strings.HasPrefix(name, "content-") || name == "mime-version"
|
|
}
|
|
|
|
func reindexParts(part *Part, partID string) {
|
|
if part == nil {
|
|
return
|
|
}
|
|
part.PartID = partID
|
|
for i, child := range part.Children {
|
|
reindexParts(child, fmt.Sprintf("%s.%d", partID, i+1))
|
|
}
|
|
}
|
|
|
|
func findPrimaryBodyPart(root *Part, mediaType string) *Part {
|
|
var best *Part
|
|
bestScore := -1
|
|
|
|
var walk func(part *Part, ancestors []string)
|
|
walk = func(part *Part, ancestors []string) {
|
|
if part == nil {
|
|
return
|
|
}
|
|
if !part.IsMultipart() {
|
|
score, ok := bodyCandidateScore(part, ancestors, mediaType)
|
|
if ok && score > bestScore {
|
|
best = part
|
|
bestScore = score
|
|
}
|
|
return
|
|
}
|
|
nextAncestors := append(append([]string{}, ancestors...), part.MediaType)
|
|
for _, child := range part.Children {
|
|
walk(child, nextAncestors)
|
|
}
|
|
}
|
|
walk(root, nil)
|
|
return best
|
|
}
|
|
|
|
func bodyCandidateScore(part *Part, ancestors []string, mediaType string) (int, bool) {
|
|
if part == nil || !strings.EqualFold(part.MediaType, mediaType) {
|
|
return 0, false
|
|
}
|
|
switch strings.ToLower(part.ContentDisposition) {
|
|
case "attachment", "inline":
|
|
return 0, false
|
|
}
|
|
score := 1
|
|
for _, ancestor := range ancestors {
|
|
switch ancestor {
|
|
case "multipart/signed", "multipart/encrypted":
|
|
return 0, false
|
|
case "multipart/alternative":
|
|
score += 10
|
|
case "multipart/related":
|
|
if mediaType == "text/html" {
|
|
score += 5
|
|
}
|
|
}
|
|
}
|
|
return score, true
|
|
}
|