Files
vas3k-TaxHacker/lib/email-sync/ingest.test.ts
Vasily Zubarev 0bed4a6e84 feat: new app - email/smtp listener (#102)
* feat: initial email impl

* feat: IMAP email ingest (builds on the scaffold) (#100)

* chore: add imap-simple, mailparser, vitest

* feat: AES-256-GCM helpers for email credentials

* feat: extract ingestUnsortedFile helper, reuse in upload action

* chore: gitignore .worktrees/

* feat: email-sync types and pure attachment/search filters

* feat: imap-simple + mailparser client wrapper

* feat: email sync orchestration with UID watermark + status persistence

* feat: encrypt email credentials at rest, add UID/addedAt fields

* feat: real IMAP test-connection, scoped sync-now, thin cron entry

* docs: update email app README to match real IMAP/encryption/UID behavior

* fix: nest SINCE search criteria and guard missing addedAt for first-run sync

* fix: show last-sync time and error detail from sync in server card

* fix: skip storage recompute when no attachments ingested

Avoids an ENOENT crash on first sync when the user's uploads dir does not exist yet and nothing was ingested; this was also masking the real per-server error. Adds regression tests for the guard.

* feat: configurable initial-grab window (fetch-since date)

First sync is bounded by a user-chosen 'Fetch emails since' date instead of the server's addedAt; blank = entire mailbox (IMAP ALL). The UID watermark takes over after the first run.

* fix: add missing @langchain/core dependency

@langchain/core is only a peer dep of the @langchain/* packages and was not installed on a clean npm install, breaking the build (e.g. /unsorted via ai/analyze).

* fix: harden email sync — UID dedup guard, locked status write, graceful decrypt, scrypt memo

Addresses review findings: skip messages at/below the UID watermark (defends against the IMAP `n:*` re-fetch quirk); lock the app_data row with SELECT FOR UPDATE so concurrent cron/manual syncs can't clobber each other; return a friendly error when a stored password can't be decrypted (e.g. after BETTER_AUTH_SECRET rotation) and document the coupling; memoize the scrypt-derived key.

* feat: enforce per-server syncInterval on cron; skip non-Buffer attachments

The cron now honors each server's syncInterval (manual Sync Now bypasses the throttle), so the configured interval is no longer ignored. Attachments whose parsed content is not a Buffer are skipped instead of throwing on .length. Adds throttle regression tests.

* refactor: remove dead lastProcessedMessageId field; clarify cron throttle in README

lastProcessedMessageId was superseded by the lastProcessedUid watermark and never read; dropped from the type and form state. README now describes the per-server interval as an app-level throttle (manual Sync Now bypasses).

* feat(email): UI-selectable sync frequency + working cron heartbeat

Replace the per-server sync-interval number input with a dropdown of
presets (15m/30m/hourly/6h/12h/daily). Switch the stored unit from hours
to minutes and update the throttle accordingly.

Make the cron actually run: heartbeat now fires every 5 minutes as the
resolution floor while each mailbox's UI frequency gates real fetches.
Propagate env into cron jobs via /etc/cron.env (cron strips the
environment) and add BETTER_AUTH_SECRET to the email-sync service in the
dev/build compose files so stored passwords can be decrypted.

* fix(email): reset Add Server dialog to provider selection on close

Radix's onOpenChange only toggled isOpen, so closing the dialog via Esc,
overlay click, or the X left the step/selectedProvider state intact.
Reopening then jumped straight to the previous provider's config form
instead of the provider-selection screen. Route every close through
handleClose() to reset the step.

---------

Co-authored-by: Evgenii Burmakin <Freika@users.noreply.github.com>
2026-06-18 23:30:38 +02:00

167 lines
7.3 KiB
TypeScript

import { beforeAll, beforeEach, describe, expect, it, vi } from "vitest"
// --- import-time mocks so importing ./ingest doesn't run config Zod validation or create a Prisma client ---
vi.mock("@/lib/uploads", () => ({ ingestUnsortedFile: vi.fn() }))
vi.mock("@/lib/db", () => {
// applyResult now locks + re-reads inside a transaction; provide a tx with $queryRaw + update.
const tx = { $queryRaw: vi.fn(async () => [{ data: { servers: [] } }]), appData: { update: vi.fn() } }
return {
prisma: { appData: { findMany: vi.fn() }, $transaction: vi.fn(async (fn: any) => fn(tx)) },
}
})
vi.mock("@/lib/files", () => ({ getDirectorySize: vi.fn(), getUserUploadsDirectory: vi.fn(() => "dir") }))
vi.mock("@/models/users", () => ({ updateUser: vi.fn() }))
vi.mock("@/lib/email-sync/imap-client", () => ({ realImapClient: { fetchMessages: vi.fn() } }))
const { syncServer, runEmailSync } = await import("./ingest")
import { prisma } from "@/lib/db"
import { realImapClient } from "@/lib/email-sync/imap-client"
import { getDirectorySize } from "@/lib/files"
import { ingestUnsortedFile } from "@/lib/uploads"
import { ImapClient, ImapMessage } from "./types"
beforeAll(() => {
process.env.BETTER_AUTH_SECRET = "test-secret-key-for-encryption-unit-tests"
})
const user = { id: "user-1", email: "u@example.com", storageUsed: 0, storageLimit: -1 } as any
function makeServer(overrides: any = {}) {
return {
id: "srv-1", name: "Inbox", provider: "custom", host: "imap.example.com", port: 993,
username: "u@example.com", password: "plaintext-pw", useSSL: true, isActive: true,
status: "pending", allowedExtensions: [".pdf"], syncInterval: 1,
addedAt: "2026-06-01T00:00:00.000Z", ...overrides,
}
}
function fakeClient(messages: ImapMessage[]): ImapClient {
return { fetchMessages: vi.fn(async () => messages) }
}
describe("syncServer", () => {
it("ingests matching attachments and advances the UID watermark", async () => {
const ingested: any[] = []
const messages: ImapMessage[] = [
{
uid: 10, messageId: "<a@x>", subject: "Invoice", from: "biz@x.com", date: new Date(),
attachments: [
{ filename: "invoice.pdf", contentType: "application/pdf", content: Buffer.from("pdf"), size: 3 },
{ filename: "logo.gif", contentType: "image/gif", content: Buffer.from("gif"), size: 3 },
],
},
{ uid: 12, attachments: [{ filename: "receipt.pdf", contentType: "application/pdf", content: Buffer.from("r"), size: 1 }] },
]
const result = await syncServer(makeServer(), user, {
client: fakeClient(messages),
ingest: async (_u, input) => { ingested.push(input); return { id: "f", ...input } as any },
})
expect(ingested.map((i) => i.filename)).toEqual(["invoice.pdf", "receipt.pdf"])
expect(result.processed).toBe(2)
expect(result.lastProcessedUid).toBe(12)
expect(result.status).toBe("connected")
})
it("does not advance the watermark when there are no new messages", async () => {
const result = await syncServer(makeServer({ lastProcessedUid: 5 }), user, {
client: fakeClient([]), ingest: async () => ({}) as any,
})
expect(result.processed).toBe(0)
expect(result.lastProcessedUid).toBe(5)
expect(result.status).toBe("connected")
})
it("skips messages at or below the watermark (defends against the IMAP `UID n:*` re-fetch quirk)", async () => {
const ingested: any[] = []
// Some servers (Gmail/Dovecot) return the highest existing UID for `UID (last+1):*`
// when there is no newer mail — re-delivering the watermark message.
const result = await syncServer(makeServer({ lastProcessedUid: 603 }), user, {
client: fakeClient([
{ uid: 603, attachments: [{ filename: "dup.pdf", contentType: "application/pdf", content: Buffer.from("x"), size: 1 }] },
]),
ingest: async (_u, input) => { ingested.push(input); return { id: "f" } as any },
})
expect(ingested).toHaveLength(0)
expect(result.processed).toBe(0)
expect(result.lastProcessedUid).toBe(603)
})
it("returns a friendly error when the stored password cannot be decrypted", async () => {
const result = await syncServer(makeServer({ password: "v1:bad:bad:bad", lastProcessedUid: 9 }), user, {
client: fakeClient([]),
ingest: async () => ({}) as any,
})
expect(result.status).toBe("error")
expect(result.errorMessage).toMatch(/could not be decrypted/i)
expect(result.lastProcessedUid).toBe(9)
})
it("reports error status and keeps the old watermark on client failure", async () => {
const result = await syncServer(makeServer({ lastProcessedUid: 7 }), user, {
client: { fetchMessages: vi.fn(async () => { throw new Error("auth failed") }) },
ingest: async () => ({}) as any,
})
expect(result.status).toBe("error")
expect(result.errorMessage).toContain("auth failed")
expect(result.lastProcessedUid).toBe(7)
})
})
describe("runEmailSync storage recompute guard", () => {
beforeEach(() => {
vi.clearAllMocks()
vi.mocked(prisma.appData.findMany).mockResolvedValue([
{ userId: "u1", user, data: { servers: [makeServer()] } },
] as any)
})
it("skips getDirectorySize when nothing was ingested (regression: ENOENT on missing uploads dir)", async () => {
vi.mocked(realImapClient.fetchMessages).mockResolvedValue([]) // 0 attachments
await runEmailSync()
expect(getDirectorySize).not.toHaveBeenCalled()
})
it("recomputes storage when at least one attachment was ingested", async () => {
vi.mocked(realImapClient.fetchMessages).mockResolvedValue([
{ uid: 20, attachments: [{ filename: "a.pdf", contentType: "application/pdf", content: Buffer.from("x"), size: 1 }] },
])
vi.mocked(ingestUnsortedFile).mockResolvedValue({ id: "f" } as any)
vi.mocked(getDirectorySize).mockResolvedValue(123)
await runEmailSync()
expect(getDirectorySize).toHaveBeenCalledTimes(1)
})
it("cron run (respectInterval) skips a server still within its syncInterval", async () => {
vi.mocked(prisma.appData.findMany).mockResolvedValue([
{ userId: "u1", user, data: { servers: [makeServer({ lastSyncedAt: new Date().toISOString(), syncInterval: 6 })] } },
] as any)
const results = await runEmailSync({ respectInterval: true })
expect(realImapClient.fetchMessages).not.toHaveBeenCalled()
expect(results).toHaveLength(0)
})
it("treats syncInterval as MINUTES: a server synced 90 min ago with interval 60 is not throttled", async () => {
vi.mocked(prisma.appData.findMany).mockResolvedValue([
{
userId: "u1",
user,
data: { servers: [makeServer({ lastSyncedAt: new Date(Date.now() - 90 * 60_000).toISOString(), syncInterval: 60 })] },
},
] as any)
vi.mocked(realImapClient.fetchMessages).mockResolvedValue([])
await runEmailSync({ respectInterval: true })
expect(realImapClient.fetchMessages).toHaveBeenCalledTimes(1)
})
it("manual sync (no respectInterval) bypasses the interval throttle", async () => {
vi.mocked(prisma.appData.findMany).mockResolvedValue([
{ userId: "u1", user, data: { servers: [makeServer({ lastSyncedAt: new Date().toISOString(), syncInterval: 6 })] } },
] as any)
vi.mocked(realImapClient.fetchMessages).mockResolvedValue([])
await runEmailSync({ userId: "u1" })
expect(realImapClient.fetchMessages).toHaveBeenCalledTimes(1)
})
})